diff --git a/CMakeLists.txt b/CMakeLists.txt
old mode 100755
new mode 100644
index 61a85bf4637f99157c7762b69e9d884edf90b874..634c581b203cbd77f1346a1689abc7bc7a1691cb
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,7 @@
 # Vladimir Klement
 # Jakub Klinkovsky
-cmake_minimum_required( VERSION 3.4 )
+cmake_minimum_required( VERSION 3.5.1 )
 project( tnl )
@@ -21,28 +21,25 @@ set( tnlVersion "0.1" )
 include( OptimizeForArchitecture )
-include( UseCodeCoverage )
 # Settings for debug/release version
-    set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Debug/src/Tests )
     set( debugExt -dbg )
-    set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/src/Tests )
 # set Debug/Release options
-set( CMAKE_CXX_FLAGS "-std=c++11 -Wall -Wno-unused-local-typedefs -Wno-unused-variable" )
+set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" )
 set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" )
 #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" )
@@ -54,7 +51,19 @@ set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_RELEASE "" )
 get_filename_component( CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME )
    message( "Intel compiler detected..."    )
+   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
+   #####
+   #  Ckeck for MIC 
+   #
+   if( WITH_MIC STREQUAL "yes" )
+       message( "Compile MIC support..."    )
+       set( MIC_CXX_FLAGS "-DHAVE_MIC")
+       # build all tests with MIC support
+       set( WITH_CUDA "no")
+   else()
+       set( MIC_CXX_FLAGS "")
+   endif( )	
@@ -67,12 +76,19 @@ if( WITH_CUDA STREQUAL "yes" )
         set(BUILD_SHARED_LIBS ON)
+        # Use the CUDA_HOST_COMPILER environment variable if the user specified it.
+            message( "-- Setting CUDA_HOST_COMPILER to '$ENV{CUDA_HOST_COMPILER}'" )
+        else()
+            message( "-- Setting CUDA_HOST_COMPILER to '${CMAKE_CXX_COMPILER}'" )
+        endif()
         # disable false compiler warnings
         #   reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910
         #   list of possible tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function\"")
-        #AddCompilerFlag( "-DHAVE_NOT_CXX11" ) # -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 " )
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function\"")
         set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20
                             -gencode arch=compute_30,code=sm_30
                             -gencode arch=compute_32,code=sm_32 
@@ -115,9 +131,6 @@ if( WITH_CUDA STREQUAL "yes" )
         # TODO: this is necessary only due to a bug in cmake
         set( CUDA_ADD_LIBRARY_OPTIONS -shared )
-        # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1)
-        set( CUDA_HOST_COMPILER "" )
         # Check for cuBLAS
     message(  "Missing header file sys/time.h" )
     set( HAVE_SYS_TIME_H "//#define HAVE_SYS_TIME_H 1" )
-    include_directories( ${SYS_TIME_INCLUDE_DIR}/tnl-${tnlVersion} )
+    #include_directories( ${SYS_TIME_INCLUDE_DIR} )
     set( HAVE_SYS_TIME_H "#define HAVE_SYS_TIME_H 1" )
     message( "Missing header file sys/time.h" )
     set( HAVE_SYS_RESOURCE_H "//#define HAVE_SYS_RESOURCE_H 1" )
-    include_directories( ${SYS_RESOURCE_INCLUDE_DIR}/tnl-${tnlVersion} )
+    #include_directories( ${SYS_RESOURCE_INCLUDE_DIR} )
     message( "Missing header file sys/time.h" )
     set( HAVE_SYS_IOCTL_H "//#define HAVE_SYS_IOCTL_H 1" )
-    include_directories( ${SYS_IOCTL_INCLUDE_DIR}/tnl-${tnlVersion} )
+    #include_directories( ${SYS_IOCTL_INCLUDE_DIR} )
     set( HAVE_SYS_IOCTL_H "#define HAVE_SYS_IOCTL_H 1" )
-   find_package( GTest )
-   if( GTEST_FOUND )
-   endif( GTEST_FOUND )
+   enable_testing()
+   # build gtest libs
+   include( BuildGtest )
+      # enable code coverage reports
+      include( UseCodeCoverage )
+   endif()
 endif( WITH_TESTS STREQUAL "yes" )
 find_package( PythonInterp 3 )
-set( CXX_TEST_FLAGS "-fprofile-arcs -ftest-coverage" )
-set( LD_TEST_FLAGS "-lgcov -coverage" )
 set( configDirectory \"${CMAKE_INSTALL_PREFIX}/share/tnl-${tnlVersion}/\")
 set( sourceDirectory \"${PROJECT_SOURCE_DIR}/\" )
 set( testsDirectory \"${PROJECT_TESTS_PATH}/\" )
@@ -342,7 +357,6 @@ endif( PYTHONINTERP_FOUND )
 #Nastavime cesty k hlavickovym souborum a knihovnam
 add_subdirectory( src )
 add_subdirectory( share )
 add_subdirectory( tests )
-add_subdirectory( examples )
+   add_subdirectory( examples )
 set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Template Numerical Library")
diff --git a/INSTALL b/INSTALL
index ab759d74a7ea9dc57225f4f0fac1479dc58e7ac7..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
@@ -1 +0,0 @@
\ No newline at end of file
diff --git a/README b/README
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f4804b5e42323862dc09b426cf601af825ae6bfd 100644
--- a/README
+++ b/README
@@ -0,0 +1,80 @@
+    Requirements:
+    To install TNL, you need:
+    cmake 3.4 or later (https://cmake.org/download/)
+    GNU g++ 4.8 or later (https://gcc.gnu.org/)
+    CUDA 8.0 or later (https://developer.nvidia.com/cuda-downloads)
+    For image processing problems, you may optionally install:
+    DCMTK (http://dicom.offis.de/dcmtk.php.en)
+    libpng (http://www.libpng.org/pub/png/libpng.html)
+    libjpeg (http://libjpeg.sourceforge.net/)
+    The latest release of TNL can be downloaded as:
+    wget tnl-project.org/data/src/tnl-0.1.tar.bz2
+    Unpack it as:
+    tar xvf tnl-0.1.tar.bz2
+    cd tnl-0.1
+    Executing command
+    ./install
+    will install TNL to a folder ${HOME}/.local . You may change it by
+    ./install --prefix=<TNL prefix>
+    During the installation, TNL fetches latest version of Gtest and install it only 
+    locally to sub-folders Debug and Release. At the end of the installation, the
+    script is checking if the prefix folder is visible to your bash and your linker.
+    If not, it informs you how to change your ${HOME}/.bashrc file to fix it.
+How to write a simple solver
+To implement your own solver:
+    Create and go to your working directory
+    mkdir MyProblem
+    cd Myproblem
+    Execute a command tnl-quickstart
+    tnl-quickstart
+    Answer the questions as, for example, follows
+    TNL Quickstart -- solver generator
+    ----------------------------------
+    Problem name:My Problem
+    Problem class base name (base name acceptable in C++ code):MyProblem
+    Operator name:Laplace
+    Write your numerical scheme by editing a file
+    Laplace_impl.h
+    on lines:
+        34, 141 and 265 for 1D, 2D and 3D problem respectively with explicit time discretization
+        101, 211 and 332 for 1D, 2D and 3D problem respectively with (semi-)implicit time discretization
+    Compile the program by executing
+    make
+    for CPU version only or 
+    make WITH_CUDA=yes
+    for a solver running on both CPU and GPU. Run it on your favourite HW architecture by executing
+    ./MyProblem
+    and following the printed help.
diff --git a/build b/build
index bd644b6a111fd298803ba4e281747b28e0c27307..e3fbfe5e470bb80e09038269423ea2cfe2a0d798 100755
--- a/build
+++ b/build
@@ -2,35 +2,51 @@
 for option in "$@"
     case $option in
         --prefix=*                       ) PREFIX="${option#*=}" ;;
+        --install=*                      ) INSTALL="${option#*=}" ;;
+        --root-dir=*                     ) ROOT_DIR="${option#*=}" ;;
+        --dcmtk-dir=*                    ) DCMTK_DIR="${option#*=}" ;;
         --build=*                        ) BUILD="${option#*=}" ;;
+        --build-jobs=*                   ) BUILD_JOBS="${option#*=}" ;;
+        --cmake=*                        ) CMAKE="${option#*=}" ;;
+        --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
+        --verbose                        ) VERBOSE="VERBOSE=1" ;;
+        --help                           ) HELP="yes" ;;
         --with-clang=*                   ) WITH_CLANG="${option#*=}" ;;
-        --with-tests=*                   ) WITH_TESTS="${option#*=}" ;;
+        --with-mic=*                     ) WITH_MIC="${option#*=}" ;;
         --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
         --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
+        --with-tests=*                   ) WITH_TESTS="${option#*=}" ;;
+        --with-coverage=*                ) WITH_COVERAGE="${option#*=}" ;;
+        --with-examples=*                ) WITH_EXAMPLES="${option#*=}" ;;
         --with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
         --instantiate-long-int=*         ) INSTANTIATE_LONG_INT="${option#*=}" ;;
         --instantiate-int=*              ) INSTANTIATE_INT="${option#*=}" ;;
@@ -44,13 +60,6 @@ do
                                            WITH_CUDA_ARCH="auto" ;;
         --optimize-vector-host-operations=* ) OPTIMIZED_VECTOR_HOST_OPERATIONS="yes" ;;
-        --with-cmake=*                   ) CMAKE="${option#*=}" ;;
-        --build-jobs=*                   ) BUILD_JOBS="${option#*=}" ;;
-        --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
-        --verbose                        ) VERBOSE="VERBOSE=1" ;;
-        --root-dir=*                     ) ROOT_DIR="${option#*=}" ;;
-        --dcmtk-dir=*                    ) DCMTK_DIR="${option#*=}" ;;
-        --help                           ) HELP="yes" ;;
         *                                ) 
            echo "Unknown option ${option}. Use --help for more information."
            exit 1 ;;
@@ -61,15 +70,18 @@ if test ${HELP} = "yes";
     echo "TNL build options:"
     echo ""
-    echo "   --prefix=PATH                         Prefix for the installation directory. ${HOME}/local by default."
     echo "   --build=Debug/Release                 Build type."
-    echo "   --with-tests=yes/no                   Enable unit tests. 'yes' by default (libcppunit-dev is required)."
+    echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to the number of available CPU cores by default."
+    echo "   --prefix=PATH                         Prefix for the installation directory. ${HOME}/local by default."
+    echo "   --install=yes/no                      Enables the installation of TNL files."
+    echo "   --with-mic=yes/no                     Enable MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
     echo "   --with-cuda=yes/no                    Enable CUDA. 'yes' by default (CUDA Toolkit is required)."
-    echo "   --with-cuda-arch=all/auto/30/35/...   Choose CUDA architecture."   
-    echo "   --with-templates-instantiation=yes/no Some TNL templates are precompiled during the build. 'yes' by default."
-    echo "   --full-build                          Instantiate all -- long int indexing, float and long double floating point arithmetics."
-    echo "   --with-cmake=CMAKE                    Path to cmake. 'cmake' by default."
-    echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to a number of CPU cores by default."
+    echo "   --with-cuda-arch=all/auto/30/35/...   Choose CUDA architecture. 'auto' by default."
+    echo "   --with-tests=yes/no                   Enable unit tests. 'yes' by default."
+    echo "   --with-coverage=yes/no                Enable code coverage reports for unit tests. 'no' by default (lcov is required)."
+    echo "   --with-examples=yes/no                Compile the 'examples' directory. 'yes' by default."
+    echo "   --with-templates-instantiation=yes/no Precompiles some TNL templates during the build. 'no' by default."
+    echo "   --cmake=CMAKE                         Path to cmake. 'cmake' by default."
     echo "   --verbose                             It enables verbose build."
     echo "   --root-dir=PATH                       Path to the TNL source code root dir."
     echo "   --dcmtk-dir=PATH                      Path to the DCMTK (Dicom Toolkit) root dir."
@@ -89,10 +101,12 @@ echo "Configuring ${BUILD} $TARGET ..."
+         -DWITH_MIC=${WITH_MIC} \
          -DWITH_CUDA=${WITH_CUDA} \
          -DWITH_TESTS=${WITH_TESTS} \
-         -DPETSC_DIR=${PETSC_DIR} \
          -DDCMTK_DIR=${DCMTK_DIR} \
@@ -109,7 +123,7 @@ fi
 if test ${CMAKE_ONLY} = "yes";
-    exit 1
+    exit 0
 if [[ -n ${BUILD_JOBS} ]]; then
@@ -128,14 +142,21 @@ else
     echo "Building ${BUILD} $TARGET ..."
-make ${VERBOSE}
+if [[ "$INSTALL" == "yes" ]]; then
+   # install implies all
+   make_target="install"
+   make_target="all"
+make ${VERBOSE} $make_target
 if test $? != 0; then
     echo "Error: Build process failed."
     exit 1
-if test WITH_TESTS = "yes";
+if test ${WITH_TESTS} = "yes";
     make test
     if test $? != 0; then
diff --git a/cmake/BuildGtest.cmake b/cmake/BuildGtest.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..5567e76c881bdd5359cffeb2c9fa07a40ec40231
--- /dev/null
+++ b/cmake/BuildGtest.cmake
@@ -0,0 +1,45 @@
+# Gtest developers recommend to build the gtest libraries directly from
+# the projects' build systems, see
+# https://github.com/google/googletest/tree/master/googletest#incorporating-into-an-existing-cmake-project
+#find_package( GTest )
+#endif( GTEST_FOUND )
+# compatibility with the GTest package
+set( GTEST_BOTH_LIBRARIES gtest gtest_main )
+# Download and unpack googletest at configure time
+configure_file(cmake/Gtest.cmake.in googletest-download/CMakeLists.txt)
+ WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
+ message(FATAL_ERROR "CMake step for googletest failed: ${result}")
+execute_process(COMMAND ${CMAKE_COMMAND} --build .
+ WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
+ message(FATAL_ERROR "Build step for googletest failed: ${result}")
+# Prevent overriding the parent project's compiler/linker
+# settings on Windows
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+# Add googletest directly to our build. This defines
+# the gtest and gtest_main targets.
+                ${CMAKE_BINARY_DIR}/googletest-build)
+# The gtest/gtest_main targets carry header search path
+# dependencies automatically when using CMake 2.8.11 or
+# later. Otherwise we have to add them here ourselves.
+    include_directories("${gtest_SOURCE_DIR}/include")
diff --git a/cmake/Gtest.cmake.in b/cmake/Gtest.cmake.in
new file mode 100644
index 0000000000000000000000000000000000000000..762cff74ab9f856066a69c97c24fe69bfd076f66
--- /dev/null
+++ b/cmake/Gtest.cmake.in
@@ -0,0 +1,22 @@
+# vim: ft=cmake
+# This is a separate template for CMakeLists.txt to build gtest as a separate project
+# Copied from upstream documentation:
+# https://github.com/google/googletest/tree/master/googletest#incorporating-into-an-existing-cmake-project
+cmake_minimum_required(VERSION 2.8.2)
+project(googletest-download NONE)
+  GIT_REPOSITORY    https://github.com/google/googletest.git
+  GIT_TAG           master
+  SOURCE_DIR        "${CMAKE_BINARY_DIR}/googletest-src"
+  BINARY_DIR        "${CMAKE_BINARY_DIR}/googletest-build"
+  TEST_COMMAND      ""
diff --git a/cmake/UseCodeCoverage.cmake b/cmake/UseCodeCoverage.cmake
index d66c9a31a224ff35f96df47720eb3d2841812284..0e4b7f46f5c560a6169fdcbf72473517a5973830 100644
--- a/cmake/UseCodeCoverage.cmake
+++ b/cmake/UseCodeCoverage.cmake
@@ -1,31 +1,25 @@
-OPTION( ENABLE_CODECOVERAGE "Enable code coverage testing support" )
-        message( WARNING "Code coverage results with an optimised (non-Debug) build may be misleading" )
-    endif ( NOT CMAKE_BUILD_TYPE STREQUAL "Debug" )
-        set( CODECOV_OUTPUTFILE cmake_coverage.output )
-        set( CODECOV_HTMLOUTPUTDIR coverage_results )
-        find_program( CODECOV_GCOV gcov )
-        find_program( CODECOV_LCOV lcov )
-        find_program( CODECOV_GENHTML genhtml )
-        add_definitions( -fprofile-arcs -ftest-coverage )
-        link_libraries( gcov )
-        add_custom_target( coverage_init ALL ${CODECOV_LCOV} --base-directory .  --directory ${CMAKE_BINARY_DIR} --output-file ${CODECOV_OUTPUTFILE} --capture --initial )
-        add_custom_target( coverage ${CODECOV_LCOV} --base-directory .  --directory ${CMAKE_BINARY_DIR} --output-file ${CODECOV_OUTPUTFILE} --capture COMMAND genhtml -o ${CODECOV_HTMLOUTPUTDIR} ${CODECOV_OUTPUTFILE} )
\ No newline at end of file
+    message( WARNING "Code coverage results with an optimised (non-Debug) build may be misleading" )
+    set( CODECOV_OUTPUTFILE cmake_coverage.output )
+    set( CODECOV_HTMLOUTPUTDIR coverage_results )
+    find_program( CODECOV_GCOV gcov )
+    find_program( CODECOV_LCOV lcov )
+    find_program( CODECOV_GENHTML genhtml )
+    add_definitions( -fprofile-arcs -ftest-coverage )
+    link_libraries( gcov )
+    add_custom_target( coverage_init ALL ${CODECOV_LCOV} --base-directory .  --directory ${CMAKE_SOURCE_DIR} --no-external --output-file ${CODECOV_OUTPUTFILE} --capture --initial --quiet )
+    add_custom_target( coverage ${CODECOV_LCOV} --base-directory .  --directory ${CMAKE_SOURCE_DIR} --no-external --output-file ${CODECOV_OUTPUTFILE} --capture --quiet COMMAND genhtml --quiet -o ${CODECOV_HTMLOUTPUTDIR} ${CODECOV_OUTPUTFILE} )
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/heat-equation/CMakeLists.txt b/examples/heat-equation/CMakeLists.txt
old mode 100755
new mode 100644
index 4b5bc34dba38e819d901cf46f311283c645d9c1a..9614bb21a6c453f80c6222b51899b7b33e74150d
--- a/examples/heat-equation/CMakeLists.txt
+++ b/examples/heat-equation/CMakeLists.txt
@@ -12,8 +12,9 @@ IF( BUILD_CUDA )
 ELSE(  BUILD_CUDA )               
    ADD_EXECUTABLE(tnl-heat-equation${debugExt} tnl-heat-equation.cpp)     
    ADD_EXECUTABLE(tnl-heat-equation-eoc-test${debugExt} tnl-heat-equation-eoc.cpp)   
-   target_link_libraries (tnl-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )
+   target_link_libraries (tnl-heat-equation${debugExt} tnl${debugExt}-${tnlVersion})
    target_link_libraries (tnl-heat-equation-eoc-test${debugExt} tnl${debugExt}-${tnlVersion} )
+   TARGET_COMPILE_DEFINITIONS( tnl-heat-equation${debugExt} PUBLIC ${MIC_CXX_FLAGS} )
diff --git a/examples/heat-equation/tnl-heat-equation-eoc.h b/examples/heat-equation/tnl-heat-equation-eoc.h
index e228a8ca98ed78b49b98537d61945616f82f7c99..f9876fb9adf66479282f1223bd48fb10da759ae7 100644
--- a/examples/heat-equation/tnl-heat-equation-eoc.h
+++ b/examples/heat-equation/tnl-heat-equation-eoc.h
@@ -53,16 +53,16 @@ class heatEquationSetter
    typedef Device DeviceType;
    typedef Index IndexType;
-   typedef Containers::StaticVector< MeshType::meshDimension, Real > Point;
+   typedef Containers::StaticVector< MeshType::getMeshDimension(), Real > Point;
    static bool run( const Config::ParameterContainer& parameters )
-      enum { Dimension = MeshType::meshDimension };
+      enum { Dimension = MeshType::getMeshDimension() };
       typedef Operators::LinearDiffusion< MeshType, Real, Index > ApproximateOperator;
       typedef Operators::ExactLinearDiffusion< Dimension > ExactOperator;
-      typedef Functions::TestFunction< MeshType::meshDimension, Real, Device > TestFunction;
+      typedef Functions::TestFunction< MeshType::getMeshDimension(), Real, Device > TestFunction;
       typedef HeatEquationEocRhs< ExactOperator, TestFunction > RightHandSide;
-      typedef Containers::StaticVector < MeshType::meshDimension, Real > Point;
+      typedef Containers::StaticVector < MeshType::getMeshDimension(), Real > Point;
       typedef Operators::DirichletBoundaryConditions< MeshType, TestFunction, Dimension, Real, Index > BoundaryConditions;
       typedef HeatEquationEocProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator > Solver;
       SolverStarter solverStarter;
diff --git a/examples/heat-equation/tnl-heat-equation.h b/examples/heat-equation/tnl-heat-equation.h
index 3b289a8e614beebf317f6cfd4bc1011dfd92f390..1a0041f93e3f6edeb73b8bfe9d632ed9538f9f41 100644
--- a/examples/heat-equation/tnl-heat-equation.h
+++ b/examples/heat-equation/tnl-heat-equation.h
@@ -68,7 +68,7 @@ class heatEquationSetter
    static bool run( const Config::ParameterContainer& parameters )
-      enum { Dimension = MeshType::meshDimension };
+      enum { Dimension = MeshType::getMeshDimension() };
       typedef Operators::LinearDiffusion< MeshType, Real, Index > ApproximateOperator;
       typedef Functions::Analytic::Constant< Dimension, Real > RightHandSide;
diff --git a/examples/inviscid-flow/CMakeLists.txt b/examples/inviscid-flow/CMakeLists.txt
index 634cce16141410c692254c2b7385b2cac5acd5a7..19f8d03003c876c4c510a7d5ed5c616a3e2e0888 100644
--- a/examples/inviscid-flow/CMakeLists.txt
+++ b/examples/inviscid-flow/CMakeLists.txt
@@ -6,19 +6,19 @@ set( tnl_inviscid_flow_SOURCES
      euler.cu )
-   CUDA_ADD_EXECUTABLE(tnl-euler-2d${debugExt} euler.cu)
-   target_link_libraries (tnl-euler-2d${debugExt} tnl${debugExt}-${tnlVersion}  ${CUSPARSE_LIBRARY} )
+   CUDA_ADD_EXECUTABLE(tnl-euler${debugExt} euler.cu)
+   target_link_libraries (tnl-euler${debugExt} tnl${debugExt}-${tnlVersion}  ${CUSPARSE_LIBRARY} )
 ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE(tnl-euler-2d${debugExt} euler.cpp)     
-   target_link_libraries (tnl-euler-2d${debugExt} tnl${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE(tnl-euler${debugExt} euler.cpp)     
+   target_link_libraries (tnl-euler${debugExt} tnl${debugExt}-${tnlVersion} )
-INSTALL( TARGETS tnl-euler-2d${debugExt}
+INSTALL( TARGETS tnl-euler${debugExt}
 INSTALL( FILES run-euler
-         DESTINATION share/tnl-${tnlVersion}/examples/inviscid-flow-2d )
+         DESTINATION share/tnl-${tnlVersion}/examples/inviscid-flow )
diff --git a/examples/inviscid-flow/LaxFridrichsContinuity.h b/examples/inviscid-flow/LaxFridrichsContinuity.h
index ebc7f8b07e5eb7df6ca819126effd68de8714420..657230695a5ec9cf84aaa84cd8b82d65e572df90 100644
--- a/examples/inviscid-flow/LaxFridrichsContinuity.h
+++ b/examples/inviscid-flow/LaxFridrichsContinuity.h
@@ -109,14 +109,14 @@ class LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Re
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
          const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
          const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
          return 1.0 / 2.0 * this->tau * this->artificialViscosity * ( u[ west ] - 2.0 * u[ center ]  + u[ east ] ) 
@@ -168,18 +168,18 @@ class LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Re
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
          const RealType& velocity_x_west = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
          const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
          const RealType& velocity_y_north = this->velocity.template getData< DeviceType >()[ 1 ].template getData< DeviceType >()[ north ];
@@ -235,21 +235,21 @@ class LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Re
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1,  0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& velocity_x_west  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ west ];
          const RealType& velocity_x_east  = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
diff --git a/examples/inviscid-flow/LaxFridrichsEnergy.h b/examples/inviscid-flow/LaxFridrichsEnergy.h
index fa0438f8c152007cbd7053d1b185abb3322801d0..d0af1de01db93d20a7ef800003b15b39228b9718 100644
--- a/examples/inviscid-flow/LaxFridrichsEnergy.h
+++ b/examples/inviscid-flow/LaxFridrichsEnergy.h
@@ -111,14 +111,14 @@ class LaxFridrichsEnergy< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
          const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
          const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
@@ -176,17 +176,17 @@ class LaxFridrichsEnergy< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
          const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
          const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
@@ -249,20 +249,20 @@ class LaxFridrichsEnergy< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& pressure_west  = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east  = this->pressure.template getData< DeviceType >()[ east ];
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumX.h b/examples/inviscid-flow/LaxFridrichsMomentumX.h
index c63d155428392ed3090179f38f18ccb2fe938943..e054ad6ad261fbf53ef5c2e3a41475be3d992fd8 100644
--- a/examples/inviscid-flow/LaxFridrichsMomentumX.h
+++ b/examples/inviscid-flow/LaxFridrichsMomentumX.h
@@ -62,14 +62,14 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
          const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
          const RealType& velocity_x_east = this->velocity.template getData< DeviceType >()[ 0 ].template getData< DeviceType >()[ east ];
@@ -134,18 +134,18 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
          const RealType& pressure_west = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east = this->pressure.template getData< DeviceType >()[ east ];
@@ -215,20 +215,20 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& pressure_west  = this->pressure.template getData< DeviceType >()[ west ];
          const RealType& pressure_east  = this->pressure.template getData< DeviceType >()[ east ];
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumY.h b/examples/inviscid-flow/LaxFridrichsMomentumY.h
index 0a2847c6356b81cf443e945bd9698dfdcaee3221..ddf7b022c31831bde9c0c0d877883db63ec697cd 100644
--- a/examples/inviscid-flow/LaxFridrichsMomentumY.h
+++ b/examples/inviscid-flow/LaxFridrichsMomentumY.h
@@ -62,9 +62,9 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         //const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         //const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          return 0.0;
@@ -123,18 +123,18 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
          const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
          const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];         
@@ -204,20 +204,20 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& pressure_north = this->pressure.template getData< DeviceType >()[ north ];
          const RealType& pressure_south = this->pressure.template getData< DeviceType >()[ south ];
diff --git a/examples/inviscid-flow/LaxFridrichsMomentumZ.h b/examples/inviscid-flow/LaxFridrichsMomentumZ.h
index a28e18156f5dff70c25111c8b6d73fc73d89e667..7f9213c4d512b79202734f10033d648a0b8cdeeb 100644
--- a/examples/inviscid-flow/LaxFridrichsMomentumZ.h
+++ b/examples/inviscid-flow/LaxFridrichsMomentumZ.h
@@ -62,9 +62,9 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         //const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         //const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          return 0.0;
@@ -123,9 +123,9 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         //const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         //const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          return 0.0;
@@ -184,20 +184,20 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts< 0,  0, -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& pressure_up    = this->pressure.template getData< DeviceType >()[ up ];
          const RealType& pressure_down  = this->pressure.template getData< DeviceType >()[ down ];
diff --git a/examples/inviscid-flow/eulerRhs.h b/examples/inviscid-flow/eulerRhs.h
index 97dc9f07b391dc52020094be9d9eed279cbd1562..51d4e024398d579f49c158292e2890536a1e319c 100644
--- a/examples/inviscid-flow/eulerRhs.h
+++ b/examples/inviscid-flow/eulerRhs.h
@@ -6,7 +6,7 @@
 namespace TNL {
 template< typename Mesh, typename Real >class eulerRhs
-  : public Functions::Domain< Mesh::meshDimension, Functions::MeshDomain > 
+  : public Functions::Domain< Mesh::getMeshDimension(), Functions::MeshDomain > 
diff --git a/examples/mean-curvature-flow/CMakeLists.txt b/examples/mean-curvature-flow/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h b/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
index f978f811af95fb7d5f66150e95cf8b8acff103e0..4d852a7d8358666d66b4482232649616cb9b5a32 100644
--- a/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
+++ b/examples/mean-curvature-flow/tnl-mean-curvature-flow-eoc.h
@@ -70,7 +70,7 @@ class meanCurvatureFlowEocSetter
    typedef Index IndexType;
    typedef typename MeshType::PointType Point;
-   enum { Dimension = MeshType::meshDimension };
+   enum { Dimension = MeshType::getMeshDimension() };
    static bool run( const Config::ParameterContainer& parameters )
@@ -79,9 +79,9 @@ class meanCurvatureFlowEocSetter
       typedef FiniteVolumeNonlinearOperator<MeshType, OperatorQ, Real, Index > NonlinearOperator;
       typedef NonlinearDiffusion< MeshType, NonlinearOperator, Real, Index > ApproximateOperator;
       typedef ExactNonlinearDiffusion< ExactGradientNorm< Dimension >, Dimension > ExactOperator;
-      typedef TestFunction< MeshType::meshDimension, Real, Device > TestFunction;
+      typedef TestFunction< MeshType::getMeshDimension(), Real, Device > TestFunction;
       typedef MeanCurvatureFlowEocRhs< ExactOperator, TestFunction, Dimension > RightHandSide;
-      typedef StaticVector < MeshType::meshDimension, Real > Point;
+      typedef StaticVector < MeshType::getMeshDimension(), Real > Point;
       typedef DirichletBoundaryConditions< MeshType, TestFunction, Dimension, Real, Index > BoundaryConditions;
       typedef MeanCurvatureFlowEocProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator > Solver;
       SolverStarter solverStarter;
diff --git a/examples/mean-curvature-flow/tnl-mean-curvature-flow.h b/examples/mean-curvature-flow/tnl-mean-curvature-flow.h
index 6a28b84210db9432d0b302b9f7f09aee6b160869..f2abd1671010c14207818909c0ca4c6e5e82b792 100644
--- a/examples/mean-curvature-flow/tnl-mean-curvature-flow.h
+++ b/examples/mean-curvature-flow/tnl-mean-curvature-flow.h
@@ -71,7 +71,7 @@ class meanCurvatureFlowSetter
    typedef Index IndexType;
    typedef typename MeshType::PointType Point;
-   enum { Dimension = MeshType::meshDimension };
+   enum { Dimension = MeshType::getMeshDimension() };
    static bool run( const Config::ParameterContainer& parameters )
@@ -102,7 +102,7 @@ class meanCurvatureFlowSetter
       typedef OneSidedNonlinearDiffusion< MeshType, NonlinearOperator, Real, Index > ApproximateOperator;
       typedef Constant< Dimension, Real > RightHandSide;
-      typedef StaticVector< MeshType::meshDimension, Real > Point;
+      typedef StaticVector< MeshType::getMeshDimension(), Real > Point;
       String boundaryConditionsType = parameters.getParameter< String >( "boundary-conditions-type" );
       if( parameters.checkParameter( "boundary-conditions-constant" ) )
diff --git a/examples/narrow-band/CMakeLists.txt b/examples/narrow-band/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/narrow-band/main.h b/examples/narrow-band/main.h
index 47d8648bbf7646cd9580134f55be6002359cf21e..e9f55656f1128eb268214eb85a4bc7fe26ec5773 100644
--- a/examples/narrow-band/main.h
+++ b/examples/narrow-band/main.h
@@ -47,26 +47,26 @@ int main( int argc, char* argv[] )
    if(dim == 2)
-		tnlNarrowBand<tnlGrid<2,double,tnlHost, int>, double, int> solver;
+		tnlNarrowBand<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
 			cerr << "Solver failed to initialize." << endl;
 			return EXIT_FAILURE;
-		checkCudaDevice;
 	   cout << "-------------------------------------------------------------" << endl;
 	   cout << "Starting solver..." << endl;
 //   else if(dim == 3)
 //   {
-//		tnlNarrowBand<tnlGrid<3,double,tnlHost, int>, double, int> solver;
+//		tnlNarrowBand<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
 //		if(!solver.init(parameters))
 //	   {
 //			cerr << "Solver failed to initialize." << endl;
 //			return EXIT_FAILURE;
 //	   }
-//		checkCudaDevice;
 //	   cout << "-------------------------------------------------------------" << endl;
 //	   cout << "Starting solver..." << endl;
 //	   solver.run();
diff --git a/examples/narrow-band/tnlNarrowBand.h b/examples/narrow-band/tnlNarrowBand.h
index e2817424a1feb0f24bcbf8a759d37e8c9697977d..7d3d19bc03b43247f735cf04c5c82368360a748d 100644
--- a/examples/narrow-band/tnlNarrowBand.h
+++ b/examples/narrow-band/tnlNarrowBand.h
@@ -17,10 +17,10 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
 #include <functions/tnlMeshFunction.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
 #include <limits.h>
@@ -55,7 +55,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -135,7 +135,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
diff --git a/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
index e532bca7774c9c42648bf43f648a07ebbf20eb78..310d0fb239260fcc740a0f2bc56b37a36662a2f6 100644
--- a/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
+++ b/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
@@ -159,7 +159,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-	checkCudaDevice;
 	int n = Mesh.getDimensions().x();
@@ -168,17 +168,17 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 	dim3 numBlocks2(statusGridSize ,statusGridSize);
-	checkCudaDevice;
-	checkCudaDevice;
 	/*dim3 threadsPerBlock(16, 16);
 	dim3 numBlocks(n/16 + 1 ,n/16 +1);*/
-	checkCudaDevice;
 	cout << "Solver initialized." << endl;
@@ -209,7 +209,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
 	cout << "Hi!" << endl;
-	checkCudaDevice;
 	cout << "Hi2!" << endl;
 	while(time < finalTime)
@@ -218,30 +218,30 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
-		checkCudaDevice;
 		time += tau;
 		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		if(reinit != 0 /*&& time != finalTime */)
 			cout << time << endl;
-			checkCudaDevice;
-			checkCudaDevice;
-			checkCudaDevice;
-			checkCudaDevice;
@@ -277,31 +277,31 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
 	if(cudaStatusVector[subgridID] != 0 && i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y())
-		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+		tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 		Real value = cudaDofVector2[Entity.getIndex()];
 		Real a,b, tmp;
 		if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9))*/ )
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 		else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17))*/ )
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-			a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 		if( j == 0 /*|| (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3))*/ )
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 		else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5)) */)
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-			b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -317,7 +317,7 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
@@ -346,11 +346,11 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 	int i = threadIdx.x + blockDim.x*blockIdx.x;
 	int j = blockDim.y*blockIdx.y + threadIdx.y;
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	int gid = Entity.getIndex();
@@ -394,18 +394,18 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //	{
 //		if(cudaDofVector[Entity.getIndex()] > 0)
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1111(i,j);
 //					else
 //						setupSquare1110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1101(i,j);
 //					else
 //						setupSquare1100(i,j);
@@ -413,16 +413,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1011(i,j);
 //					else
 //						setupSquare1010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1001(i,j);
 //					else
 //						setupSquare1000(i,j);
@@ -431,18 +431,18 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //		}
 //		else
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0111(i,j);
 //					else
 //						setupSquare0110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0101(i,j);
 //					else
 //						setupSquare0100(i,j);
@@ -450,16 +450,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0011(i,j);
 //					else
 //						setupSquare0010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0001(i,j);
 //					else
 //						setupSquare0000(i,j);
@@ -497,7 +497,7 @@ Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: f
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
@@ -603,7 +603,7 @@ __global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	__shared__ double u0;
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
@@ -641,7 +641,7 @@ __global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
 // run this with one thread per block
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 //	printf("Hello\n");
 	if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1)
@@ -670,7 +670,7 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, double tau)
+__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau)
 	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x;
 	int i = threadIdx.x + blockIdx.x*blockDim.x;
@@ -688,44 +688,44 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
 		if(status != 0)
-			tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
+			tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
-			tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+			tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 			double value = solver->cudaDofVector2[Entity.getIndex()];
 			double xf,xb,yf,yb, grad, fu, a,b;
 			a = b = 0.0;
 			if( i == 0 || (threadIdx.x == 0 && !(status & 9)) )
-				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
-				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
+				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
+				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
 			else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) )
-				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()] - value;
+				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
+				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()] - value;
-				xb =  value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
+				xb =  value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
+				xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
 			if( j == 0 || (threadIdx.y == 0 && !(status & 3)) )
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] ;
-				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] ;
+				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
 			else if( j == solver->Mesh.getDimensions().y() - 1  || (threadIdx.y == blockDim.y - 1 && !(status & 5)) )
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
-				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
+				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()] - value;
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
-				yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()];
+				yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
@@ -837,14 +837,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -856,14 +856,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -875,18 +875,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -895,9 +895,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -908,18 +908,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -928,9 +928,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -941,18 +941,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -961,9 +961,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -974,17 +974,17 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -994,9 +994,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1008,18 +1008,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -1028,9 +1028,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1041,18 +1041,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -1061,9 +1061,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1074,18 +1074,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -1094,9 +1094,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1107,17 +1107,17 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -1127,9 +1127,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1144,18 +1144,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -1164,9 +1164,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1177,18 +1177,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -1197,9 +1197,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1210,14 +1210,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1234,18 +1234,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -1254,9 +1254,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1267,18 +1267,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -1287,9 +1287,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1300,14 +1300,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
index 2d1296058391a0685d9385047469acb41a40b765..cb41d572674bd468cfa0286e274e630dd56fdfa6 100644
--- a/examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
+++ b/examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
@@ -158,7 +158,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-	checkCudaDevice;
 	int n = Mesh.getDimensions().x();
@@ -167,17 +167,17 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 	dim3 numBlocks2(statusGridSize ,statusGridSize);
-	checkCudaDevice;
-	checkCudaDevice;
 	/*dim3 threadsPerBlock(16, 16);
 	dim3 numBlocks(n/16 + 1 ,n/16 +1);*/
-	checkCudaDevice;
 	cout << "Solver initialized." << endl;
@@ -208,7 +208,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
 	cout << "Hi!" << endl;
-	checkCudaDevice;
 	cout << "Hi2!" << endl;
 	while(time < finalTime)
@@ -217,30 +217,30 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r
-		checkCudaDevice;
 		time += tau;
 		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		if(reinit != 0 /*&& time != finalTime */)
 			cout << time << endl;
-			checkCudaDevice;
-			checkCudaDevice;
-			checkCudaDevice;
-			checkCudaDevice;
@@ -276,31 +276,31 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
 	if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && Mesh.getDimensions().y())
-		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+		tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 		Real value = cudaDofVector2[Entity.getIndex()];
 		Real a,b, tmp;
 		if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9)) */)
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 		else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17)) */)
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-			a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 		if( j == 0/* || (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3)) */)
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 		else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5))*/ )
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-			b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -316,7 +316,7 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
@@ -345,11 +345,11 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 	int i = threadIdx.x + blockDim.x*blockIdx.x;
 	int j = blockDim.y*blockIdx.y + threadIdx.y;
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	int gid = Entity.getIndex();
@@ -392,18 +392,18 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //	{
 //		if(cudaDofVector[Entity.getIndex()] > 0)
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1111(i,j);
 //					else
 //						setupSquare1110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1101(i,j);
 //					else
 //						setupSquare1100(i,j);
@@ -411,16 +411,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1011(i,j);
 //					else
 //						setupSquare1010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1001(i,j);
 //					else
 //						setupSquare1000(i,j);
@@ -429,18 +429,18 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //		}
 //		else
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0111(i,j);
 //					else
 //						setupSquare0110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0101(i,j);
 //					else
 //						setupSquare0100(i,j);
@@ -448,16 +448,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0011(i,j);
 //					else
 //						setupSquare0010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0001(i,j);
 //					else
 //						setupSquare0000(i,j);
@@ -495,7 +495,7 @@ Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: f
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
@@ -601,7 +601,7 @@ __global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	__shared__ double u0;
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
@@ -639,7 +639,7 @@ __global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
 // run this with one thread per block
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 //	printf("Hello\n");
 	if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1)
@@ -666,7 +666,7 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, double tau)
+__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau)
 	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x;
 	int i = threadIdx.x + blockIdx.x*blockDim.x;
@@ -684,44 +684,44 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int
 //		if(status != 0)
-			tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
+			tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
-			tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+			tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 			double value = solver->cudaDofVector2[Entity.getIndex()];
 			double xf,xb,yf,yb, grad, fu, a,b;
 			a = b = 0.0;
 			if( i == 0 /*|| (threadIdx.x == 0 && !(status & 9)) */)
-				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
-				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
+				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
+				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
 			else if( i == solver->Mesh.getDimensions().x() - 1 /*|| (threadIdx.x == blockDim.x - 1 && !(status & 17)) */)
-				xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()] - value;
+				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
+				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()] - value;
-				xb =  value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] - value;
+				xb =  value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
+				xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
 			if( j == 0/* || (threadIdx.y == 0 && !(status & 3))*/ )
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] ;
-				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] ;
+				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
 			else if( j == solver->Mesh.getDimensions().y() - 1  /*|| (threadIdx.y == blockDim.y - 1 && !(status & 5)) */)
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
-				yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
+				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()] - value;
-				yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
-				yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] - value;
+				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()];
+				yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
@@ -833,14 +833,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -852,14 +852,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -871,18 +871,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -891,9 +891,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -904,18 +904,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -924,9 +924,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -937,18 +937,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -957,9 +957,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -970,17 +970,17 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -990,9 +990,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1004,18 +1004,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -1024,9 +1024,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1037,18 +1037,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -1057,9 +1057,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1070,18 +1070,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -1090,9 +1090,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1103,17 +1103,17 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -1123,9 +1123,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1140,18 +1140,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -1160,9 +1160,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1173,18 +1173,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -1193,9 +1193,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1206,14 +1206,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1230,18 +1230,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -1250,9 +1250,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1263,18 +1263,18 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -1283,9 +1283,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1296,14 +1296,14 @@ template< typename MeshReal,
           typename Index >
 void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/examples/narrow-band/tnlNarrowBand2D_impl.h b/examples/narrow-band/tnlNarrowBand2D_impl.h
index 8248baa0949862de6ebcee797e6916c9634de72e..f1afb9ab8f7d12f1639d5a08d64edf8143f47083 100644
--- a/examples/narrow-band/tnlNarrowBand2D_impl.h
+++ b/examples/narrow-band/tnlNarrowBand2D_impl.h
@@ -93,7 +93,7 @@ template< typename MeshReal,
 bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
@@ -105,22 +105,22 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-			neighbourEntities.refresh(Mesh,Entity.getIndex());
+			neighborEntities.refresh(Mesh,Entity.getIndex());
 				if(dofVector[this->Entity.getIndex()] > 0)
-					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -128,16 +128,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -146,18 +146,18 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -165,16 +165,16 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -397,29 +397,29 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real value = dofVector2[Entity.getIndex()];
 	Real a,b, tmp;
 	if( i == 0 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+				 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 	if( j == 0 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -466,11 +466,11 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -484,11 +484,11 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -502,15 +502,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -519,9 +519,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -534,15 +534,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -551,9 +551,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -566,15 +566,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -583,9 +583,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -598,14 +598,14 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -615,9 +615,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -631,15 +631,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -648,9 +648,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -663,15 +663,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -680,9 +680,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -695,15 +695,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -712,9 +712,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -727,14 +727,14 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -744,9 +744,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -763,15 +763,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -780,9 +780,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -795,15 +795,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -812,9 +812,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -827,11 +827,11 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -850,15 +850,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -867,9 +867,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -882,15 +882,15 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -899,9 +899,9 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -914,11 +914,11 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: s
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
index ac8a023d7d93060f63858892c0541f5eda0ee706..2b96ad58205d0578776b2b2d645eb71292c0fad4 100644
--- a/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
+++ b/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
@@ -111,10 +111,10 @@ bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: i
 	dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1);
-	checkCudaDevice;
-	checkCudaDevice;
 	return true;
@@ -139,7 +139,7 @@ bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: r
-	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -165,41 +165,41 @@ template< typename MeshReal,
 void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
 	Real value = cudaDofVector2[Entity.getIndex()];
 	Real a,b,c, tmp;
 	if( i == 0 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-		a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()] );
+		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()] );
 	if( j == 0 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-		b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()] );
+		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()] );
 	if( k == 0 )
-		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
 	else if( k == Mesh.getDimensions().z() - 1 )
-		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-		c = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()] );
+		c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()] );
 	Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c);
@@ -222,7 +222,7 @@ template< typename MeshReal,
 bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k)
-	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
 	int gid = Entity.getIndex();
@@ -255,7 +255,7 @@ Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: f
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
 	int gx = 0;
@@ -474,7 +474,7 @@ __global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
diff --git a/examples/narrow-band/tnlNarrowBand3D_impl.h b/examples/narrow-band/tnlNarrowBand3D_impl.h
index eb446d9d2831967e9016b3eaaf326baf6751ab7c..33d7ef8cd2c3a19992760ed15dc732b6da7f42d8 100644
--- a/examples/narrow-band/tnlNarrowBand3D_impl.h
+++ b/examples/narrow-band/tnlNarrowBand3D_impl.h
@@ -237,38 +237,38 @@ void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: u
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
 	Real value = dofVector2[Entity.getIndex()];
 	Real a,b,c, tmp;
 	if( i == 0 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()];
+		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()] );
+		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()] );
 	if( j == 0 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()];
-		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()] );
+		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()] );
 	if( k == 0 )
-		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()];
+		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()];
 	else if( k == Mesh.getDimensions().z() - 1 )
-		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()];
+		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()];
-		c = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()] );
+		c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()] );
 	Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c);
diff --git a/examples/narrow-band/tnlNarrowBand_CUDA.h b/examples/narrow-band/tnlNarrowBand_CUDA.h
index 8da92f5fc570275c85ebba69113f0e59a4be7988..ca9b1da2cc6e26b14bc003532b6eea75e89d907d 100644
--- a/examples/narrow-band/tnlNarrowBand_CUDA.h
+++ b/examples/narrow-band/tnlNarrowBand_CUDA.h
@@ -17,9 +17,9 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
@@ -54,7 +54,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -138,7 +138,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -183,16 +183,16 @@ protected:
 #ifdef HAVE_CUDA
 //template<int sweep_t>
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i);
-//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
+//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
-__global__ void initSetupGrid1_2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, double tau);
-//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver);
+__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
+__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
+__global__ void initSetupGrid1_2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
+__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau);
+//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
diff --git a/examples/navier-stokes/CMakeLists.txt b/examples/navier-stokes/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/navier-stokes/share/CMakeLists.txt b/examples/navier-stokes/share/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/navier-stokes/share/examples/CMakeLists.txt b/examples/navier-stokes/share/examples/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/examples/quad-test/main.cpp b/examples/quad-test/main.cpp
index dbaf0114448391b9c80744882e9fe7cb86ab4cca..583d3c4ca8470297d320ec305a99e96f6ef547d7 100644
--- a/examples/quad-test/main.cpp
+++ b/examples/quad-test/main.cpp
@@ -34,7 +34,7 @@ int main(int argc, char* argv[]) {
 	String inputFile = parameters.getParameter <String> ("input-file");
 	File binaryFile;
-	if(! binaryFile.open(inputFile, tnlReadMode)) {
+	if(! binaryFile.open(inputFile, IOMode::read)) {
 		cerr << "I am not able to open the file " << inputFile << "." << std::endl;
 		return 1;
@@ -48,4 +48,4 @@ int main(int argc, char* argv[]) {
 	CSR <QuadDouble> quadMatrix("quad");
 	quadMatrix = doubleMatrix;
 	return EXIT_SUCCESS;
\ No newline at end of file
diff --git a/examples/transport-equation/transportEquationProblem.h b/examples/transport-equation/transportEquationProblem.h
index a9fc1482075afd51b4e5490dae2c67c3572e451e..6b0dd73f6353c826f5d1c6aee598dacb228b1c71 100644
--- a/examples/transport-equation/transportEquationProblem.h
+++ b/examples/transport-equation/transportEquationProblem.h
@@ -96,6 +96,11 @@ public PDEProblem< Mesh,
                                  DofVectorPointer& rightHandSide,
                                  MeshDependentDataPointer& meshDependentData );
+      template< typename Matrix >
+      void saveFailedLinearSystem( const Matrix& matrix,
+                                   const DofVectorType& dofs,
+                                   const DofVectorType& rightHandSide ) const;
       MeshFunctionPointer uPointer;
diff --git a/examples/transport-equation/transportEquationProblem_impl.h b/examples/transport-equation/transportEquationProblem_impl.h
index 698a7bc687dc4c3b4df49a48fef2f5a73566ecbc..f647802eef18fc9073fdc9b439aa345048d54ea2 100644
--- a/examples/transport-equation/transportEquationProblem_impl.h
+++ b/examples/transport-equation/transportEquationProblem_impl.h
@@ -223,4 +223,17 @@ assemblyLinearSystem( const RealType& time,
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename DifferentialOperator >
+    template< typename Matrix >
+transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
+saveFailedLinearSystem( const Matrix& matrix,
+                        const DofVectorType& dofs,
+                        const DofVectorType& rightHandSide ) const
 } // namespace TNL
diff --git a/install b/install
index 139fe67d3b2cd3b5bec3d973a1d336e071ea5182..6ce0b8a60e9d230b73947c65f2a8ef1e2e767fe1 100755
--- a/install
+++ b/install
@@ -35,10 +35,8 @@ then
        mkdir Debug
     cd Debug
-    if ../build --root-dir=.. --build=Debug ${OPTIONS};
+    if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
-        make install
-    else    
        exit 1
     cd ..
@@ -51,10 +49,8 @@ then
        mkdir Release
     cd Release
-    if ../build --root-dir=.. --build=Release ${OPTIONS};
+    if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
-        make install
-    else
         exit 1
     cd ..
diff --git a/share/CMakeLists.txt b/share/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/share/Tools/CMakeLists.txt b/share/Tools/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9499c8dfd96ca922535eacf1358fb863762147da..75215d9ec394fea0d12748a53d9bec7bb9e227a9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,4 @@
\ No newline at end of file
diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h
index f8f60200d0bdfdc4910334174bb7b9ecd3a63977..ce1f7d10bb05d6f192fbee4dd280d63f9f425537 100644
--- a/src/TNL/Assert.h
+++ b/src/TNL/Assert.h
@@ -2,7 +2,7 @@
                           Assert.h  -  description
     begin                : Jan 12, 2010
-    copyright            : (C) 2013 by Tomas Oberhuber
+    copyright            : (C) 2013 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
@@ -11,51 +11,370 @@
 #pragma once
- * Debugging assert
+ * The purpose of this file is to define the TNL_ASSERT_* debugging macros as
+ * shown below.
+ *
+ * If the 'NDEBUG' macro is defined, the build is considered to be optimized
+ * and all assert macros are empty. Otherwise, the conditions are checked and
+ * failures lead to the diagnostics message being printed to std::cerr and
+ * program abortion (via 'throw EXIT_FAILURE' statement).
+ *
+ * For the purpose of providing Python bindings it is possible to change the
+ * reporting behaviour by defining the TNL_THROW_ASSERTION_ERROR macro, which
+ * leads to throwing the ::TNL::Assert::AssertionError holding the error
+ * message (which is not printed in this case). The AssertionError class does
+ * not inherit from std::exception to avoid being caught by normal exception
+ * handlers, but the code for Python bindings can use it to translate it to the
+ * Python's AssertionError exception.
+ *
+ * Implemented by: Jakub Klinkovsky
-#ifndef NDEBUG
+#if defined(NDEBUG) || defined(HAVE_MIC)
+// empty macros for optimized build
+#define TNL_ASSERT_TRUE( val, msg )
+#define TNL_ASSERT_FALSE( val, msg )
+#define TNL_ASSERT_EQ( val1, val2, msg )
+#define TNL_ASSERT_NE( val1, val2, msg )
+#define TNL_ASSERT_LE( val1, val2, msg )
+#define TNL_ASSERT_LT( val1, val2, msg )
+#define TNL_ASSERT_GE( val1, val2, msg )
+#define TNL_ASSERT_GT( val1, val2, msg )
+#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )
+#else /* #ifdef NDEBUG */
+#include <sstream>
 #include <iostream>
-#include <stdlib.h>
-#include <assert.h>
+#include <stdio.h>
+#include <TNL/Devices/CudaCallable.h>
+namespace TNL {
+namespace Assert {
+// This will be used by the code for Python bindings to translate assertion
+// failures to the Python's AssertionError exception.
+class AssertionError
+    AssertionError( const std::string& msg )
+       : msg( msg )
+    {}
+    const char* what() const
+    {
+       return msg.c_str();
+    }
+    std::string msg;
+inline void
+printDiagnosticsHost( const char* assertion,
+                      const char* message,
+                      const char* file,
+                      const char* function,
+                      int line,
+                      const char* diagnostics )
+   std::stringstream str;
+   str << "Assertion '" << assertion << "' failed !!!\n"
+       << "Message: " << message << "\n"
+       << "File: " << file << "\n"
+       << "Function: " << function << "\n"
+       << "Line: " << line << "\n"
+       << "Diagnostics:\n" << diagnostics << std::endl;
+   throw AssertionError( str.str() );
+// This will be used in regular C++ code
+inline void
+printDiagnosticsHost( const char* assertion,
+                      const char* message,
+                      const char* file,
+                      const char* function,
+                      int line,
+                      const char* diagnostics )
+   std::cerr << "Assertion '" << assertion << "' failed !!!\n"
+             << "Message: " << message << "\n"
+             << "File: " << file << "\n"
+             << "Function: " << function << "\n"
+             << "Line: " << line << "\n"
+             << "Diagnostics:\n" << diagnostics << std::endl;
+inline void
+printDiagnosticsCuda( const char* assertion,
+                      const char* message,
+                      const char* file,
+                      const char* function,
+                      int line,
+                      const char* diagnostics )
+   printf( "Assertion '%s' failed !!!\n"
+           "Message: %s\n"
+           "File: %s\n"
+           "Function: %s\n"
+           "Line: %d\n"
+           "Diagnostics: %s\n",
+           assertion, message, file, function, line, diagnostics );
+inline void
+#ifdef __CUDA_ARCH__
+   // https://devtalk.nvidia.com/default/topic/509584/how-to-cancel-a-running-cuda-kernel-/
+   // TODO: it is reported as "illegal instruction", but that leads to an abort as well...
+   asm("trap;");
+   throw EXIT_FAILURE;
-#ifndef NDEBUG   
+template< typename T >
+struct Formatter
+   static std::string
+   printToString( const T& value )
+   {
+      ::std::stringstream ss;
+      ss << value;
+      return ss.str();
+   }
+struct Formatter< bool >
+   static std::string
+   printToString( const bool& value )
+   {
+      if( value ) return "true";
+      else return "false";
+   }
+template< typename T1, typename T2 >
+__cuda_callable__ void
+cmpHelperOpFailure( const char* assertion,
+                    const char* message,
+                    const char* file,
+                    const char* function,
+                    int line,
+                    const char* lhs_expression,
+                    const char* rhs_expression,
+                    const T1& lhs_value,
+                    const T2& rhs_value,
+                    const char* op )
+#ifdef __CUDA_ARCH__
+   // diagnostics is not supported - we don't have the machinery
+   // to construct the dynamic error message
+   printDiagnosticsCuda( assertion, message, file, function, line,
+                         "Not supported in CUDA kernels." );
+   const std::string formatted_lhs_value = Formatter< T1 >::printToString( lhs_value );
+   const std::string formatted_rhs_value = Formatter< T2 >::printToString( rhs_value );
+   std::stringstream str;
+   if( std::string(op) == "==" ) {
+      str << "      Expected: " << lhs_expression;
+      if( formatted_lhs_value != lhs_expression ) {
+         str << "\n      Which is: " << formatted_lhs_value;
+      }
+      str << "\nTo be equal to: " << rhs_expression;
+      if( formatted_rhs_value != rhs_expression ) {
+         str << "\n      Which is: " << formatted_rhs_value;
+      }
+      str << std::endl;
+   }
+   else {
+      str << "Expected: (" << lhs_expression << ") " << op << " (" << rhs_expression << "), "
+          << "actual: " << formatted_lhs_value << " vs " << formatted_rhs_value << std::endl;
+   }
+   printDiagnosticsHost( assertion, message, file, function, line,
+                         str.str().c_str() );
+   fatalFailure();
+template< typename T1, typename T2 >
+__cuda_callable__ void
+cmpHelperTrue( const char* assertion,
+               const char* message,
+               const char* file,
+               const char* function,
+               int line,
+               const char* expr1,
+               const char* expr2,
+               const T1& val1,
+               const T2& val2 )
+   // explicit cast is necessary, because T1::operator! might not be defined
+   if( ! (bool) val1 )
+      ::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line,
+                                         expr1, "true", val1, true, "==" );
+template< typename T1, typename T2 >
+__cuda_callable__ void
+cmpHelperFalse( const char* assertion,
+                const char* message,
+                const char* file,
+                const char* function,
+                int line,
+                const char* expr1,
+                const char* expr2,
+                const T1& val1,
+                const T2& val2 )
+   if( val1 )
+      ::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line,
+                                         expr1, "false", val1, false, "==" );
+// A macro for implementing the helper functions needed to implement
+// TNL_ASSERT_??. It is here just to avoid copy-and-paste of similar code.
+#define TNL_IMPL_CMP_HELPER_( op_name, op ) \
+template< typename T1, typename T2 > \
+__cuda_callable__ void \
+cmpHelper##op_name( const char* assertion, \
+                    const char* message, \
+                    const char* file, \
+                    const char* function, \
+                    int line, \
+                    const char* expr1, \
+                    const char* expr2, \
+                    const T1& val1, \
+                    const T2& val2 ) \
+   if( ! ( (val1) op (val2) ) ) \
+      ::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line, \
+                                         expr1, expr2, val1, val2, #op );\
+// Implements the helper function for TNL_ASSERT_EQ
+// Implements the helper function for TNL_ASSERT_NE
+// Implements the helper function for TNL_ASSERT_LE
+// Implements the helper function for TNL_ASSERT_LT
+// Implements the helper function for TNL_ASSERT_GE
+// Implements the helper function for TNL_ASSERT_GT
+} // namespace Assert
+} // namespace TNL
+// Internal macro wrapping the __PRETTY_FUNCTION__ "magic".
 #if defined( __NVCC__ ) && ( __CUDACC_VER__ < 80000 )
-    #define TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)"
+    #define __TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)"
+// Internal macro to compose the string representing the assertion.
+// We can't do it easily at runtime, because we have to support assertions
+// in CUDA kernels, which can't use std::string objects. Instead, we do it
+// at compile time - adjacent strings are joined at the language level.
+#define __TNL_JOIN_STRINGS( val1, op, val2 ) \
+   __STRING( val1 ) " " __STRING( op ) " " __STRING( val2 )
+// Internal macro to pass all the arguments to the specified cmpHelperOP
+#define __TNL_ASSERT_PRED2( pred, op, val1, val2, msg ) \
+   pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \
+         msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \
+         #val1, #val2, val1, val2 )
+// Main definitions of the TNL_ASSERT_* macros
+// unary
+#define TNL_ASSERT_TRUE( val, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperTrue, ==, val, true, msg )
+#define TNL_ASSERT_FALSE( val, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperFalse, ==, val, false, msg )
+// binary
+#define TNL_ASSERT_EQ( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperEQ, ==, val1, val2, msg )
+#define TNL_ASSERT_NE( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperNE, !=, val1, val2, msg )
+#define TNL_ASSERT_LE( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperLE, <=, val1, val2, msg )
+#define TNL_ASSERT_LT( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperLT, <,  val1, val2, msg )
+#define TNL_ASSERT_GE( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperGE, >=, val1, val2, msg )
+#define TNL_ASSERT_GT( val1, val2, msg ) \
+   __TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperGT, >,  val1, val2, msg )
+ * Original assert macro with custom command for diagnostics.
+ */
 // __CUDA_ARCH__ is defined by the compiler only for code executed on GPU
 #ifdef __CUDA_ARCH__
-#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                     \
-   if( ! ( ___tnl__assert_condition ) )                                                                    \
-   {                                                                                                       \
-   printf( "Assertion '%s' failed !!! \n File: %s \n Line: %d \n Diagnostics: Not supported with CUDA.\n", \
-           __STRING( ___tnl__assert_condition ),                                                           \
-           __FILE__,                                                                                       \
-           __LINE__ );                                                                                     \
-                                                                                                           \
+#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                         \
+   if( ! ( ___tnl__assert_condition ) )                                                                        \
+   {                                                                                                           \
+      printf( "Assertion '%s' failed !!! \n File: %s \n Line: %d \n Diagnostics: Not supported with CUDA.\n",  \
+              __STRING( ___tnl__assert_condition ),                                                            \
+              __FILE__,                                                                                        \
+              __LINE__ );                                                                                      \
+      asm("trap;");                                                                                            \
-#else // __CUDA_ARCH__
-#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                  \
-   if( ! ( ___tnl__assert_condition ) )                                                                 \
-   {                                                                                                    \
-   std::cerr << "Assertion '" << __STRING( ___tnl__assert_condition ) << "' failed !!!" << std::endl    \
-             << "File: " << __FILE__ << std::endl                                                       \
-             << "Function: " << TNL_PRETTY_FUNCTION << std::endl                                        \
-             << "Line: " << __LINE__ << std::endl                                                       \
-             << "Diagnostics: ";                                                                        \
-        ___tnl__assert_command;                                                                         \
-        throw EXIT_FAILURE;                                                                             \
+#else // #ifdef __CUDA_ARCH__
+// This will be used by the code for Python bindings to translate assertion
+// failures to the Python's AssertionError exception.
+#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                   \
+   if( ! ( ___tnl__assert_condition ) )                                                                  \
+   {                                                                                                     \
+      std::stringstream buffer;                                                                          \
+      auto old = std::cerr.rdbuf( buffer.rdbuf() );                                                      \
+                                                                                                         \
+      std::cerr << "Assertion '" << __STRING( ___tnl__assert_condition ) << "' failed !!!" << std::endl  \
+                << "File: " << __FILE__ << std::endl                                                     \
+                << "Function: " << __PRETTY_FUNCTION__ << std::endl                                      \
+                << "Line: " << __LINE__ << std::endl                                                     \
+                << "Diagnostics: ";                                                                      \
+      ___tnl__assert_command;                                                                            \
+                                                                                                         \
+      std::string msg = buffer.str();                                                                    \
+      std::cerr.rdbuf( old );                                                                            \
+      throw ::TNL::Assert::AssertionError( msg );                                                        \
-#endif // __CUDA_ARCH__
+// This will be used in regular C++ code
+#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )                                   \
+   if( ! ( ___tnl__assert_condition ) )                                                                  \
+   {                                                                                                     \
+      std::cerr << "Assertion '" << __STRING( ___tnl__assert_condition ) << "' failed !!!" << std::endl  \
+                << "File: " << __FILE__ << std::endl                                                     \
+                << "Function: " << __TNL_PRETTY_FUNCTION << std::endl                                    \
+                << "Line: " << __LINE__ << std::endl                                                     \
+                << "Diagnostics: ";                                                                      \
+      ___tnl__assert_command;                                                                            \
+      throw EXIT_FAILURE;                                                                                \
+   }
+#endif // #ifdef __CUDA_ARCH__
-#else /* #ifndef NDEBUG */
-#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )
-#endif /* #ifndef NDEBUG */
+#endif // #ifdef NDEBUG
\ No newline at end of file
diff --git a/src/TNL/CMakeLists.txt b/src/TNL/CMakeLists.txt
old mode 100755
new mode 100644
index 957120fdac7d916dad1309131e8fb714224ee7a3..3bdfaca909298fc323cbcf493c5aabd986e60af0
--- a/src/TNL/CMakeLists.txt
+++ b/src/TNL/CMakeLists.txt
@@ -2,6 +2,7 @@ ADD_SUBDIRECTORY( Config )
 ADD_SUBDIRECTORY( Experimental )
@@ -30,6 +31,7 @@ set( headers
+     ParallelFor.h
@@ -90,6 +92,8 @@ if( BUILD_CUDA )
 else( BUILD_CUDA )
    ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED 
                 ${tnl_SOURCES} )
+  #TARGET_COMPILE_DEFINITIONS( tnl${debugExt}-${tnlVersion} PUBLIC -DHAVE_MIC )
 endif( BUILD_CUDA )                                    
 SET_TARGET_PROPERTIES( tnl${debugExt}-${tnlVersion} PROPERTIES 
diff --git a/src/TNL/Config/CMakeLists.txt b/src/TNL/Config/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h
index af57fa14a4bb07d248750f69c4927b4e357aec06..5daeb07861d0844ebaf6c4ead8efa5e47b02e53e 100644
--- a/src/TNL/Config/ParameterContainer.h
+++ b/src/TNL/Config/ParameterContainer.h
@@ -128,7 +128,7 @@ setParameter( const String& name,
       if( parameters[ i ] -> name == name )
-         if( parameters[ i ] -> type == getType( value ) )
+         if( parameters[ i ] -> type == TNL::getType< T >() )
             ( ( tnlParameter< T > * ) parameters[ i ] ) -> value = value;
             return true;
@@ -137,7 +137,7 @@ setParameter( const String& name,
             std::cerr << "Parameter " << name << " already exists with different type "
                  << parameters[ i ] -> type << " not "
-                 << getType( value ) << std::endl;
+                 << TNL::getType< T >() << std::endl;
             abort( );
             return false;
diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/ArrayOperations.h
index e32c7fd288c098ae2e648c1ad2fdbc7a6ccfc324..4861cc460262ec74cad2a35400eb472cf2e551dc 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperations.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperations.h
@@ -12,6 +12,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/MIC.h>
 namespace TNL {
 namespace Containers {   
@@ -27,11 +28,11 @@ class ArrayOperations< Devices::Host >
    template< typename Element, typename Index >
-   static bool allocateMemory( Element*& data,
+   static void allocateMemory( Element*& data,
                                const Index size );
    template< typename Element >
-   static bool freeMemory( Element* data );
+   static void freeMemory( Element* data );
    template< typename Element >
    static void setMemoryElement( Element* data,
@@ -73,11 +74,11 @@ class ArrayOperations< Devices::Cuda >
    template< typename Element, typename Index >
-   static bool allocateMemory( Element*& data,
+   static void allocateMemory( Element*& data,
                                const Index size );
    template< typename Element >
-   static bool freeMemory( Element* data );
+   static void freeMemory( Element* data );
    template< typename Element >
    static void setMemoryElement( Element* data,
@@ -86,6 +87,7 @@ class ArrayOperations< Devices::Cuda >
    template< typename Element >
    static Element getMemoryElement( const Element* data );
+   // TODO: does not make sense for CUDA - remove?
    template< typename Element, typename Index >
    static Element& getArrayElementReference( Element* data, const Index i );
@@ -152,9 +154,96 @@ class ArrayOperations< Devices::Host, Devices::Cuda >
                               const Index size );
+class ArrayOperations< Devices::MIC >
+   public:
+   template< typename Element, typename Index >
+   static void allocateMemory( Element*& data,
+                               const Index size );
+   template< typename Element >
+   static void freeMemory( Element* data );
+   template< typename Element >
+   static void setMemoryElement( Element* data,
+                                 const Element& value );
+   template< typename Element >
+   static Element getMemoryElement( const Element* data );
+   template< typename Element, typename Index >
+   static Element& getArrayElementReference( Element* data, const Index i );
+   template< typename Element, typename Index >
+   static const Element& getArrayElementReference( const Element* data, const Index i );
+   template< typename Element, typename Index >
+   static bool setMemory( Element* data,
+                          const Element& value,
+                          const Index size );
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool copyMemory( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size );
+   template< typename Element1,
+             typename Element2,
+             typename Index >
+   static bool compareMemory( const Element1* destination,
+                              const Element2* source,
+                              const Index size );
+class ArrayOperations< Devices::MIC, Devices::Host >
+   public:
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool copyMemory( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size );
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compareMemory( const DestinationElement* destination,
+                              const SourceElement* source,
+                              const Index size );
+class ArrayOperations< Devices::Host, Devices::MIC >
+   public:
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool copyMemory( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size );
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index >
+   static bool compareMemory( const DestinationElement* destination,
+                              const SourceElement* source,
+                              const Index size );
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
 #include <TNL/Containers/Algorithms/ArrayOperationsHost_impl.h>
 #include <TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h>
+#include <TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h>
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
index 74bf0a73005e86015b443d4c99a6274d39ab2077..1465c6250bbcd2510b8ceb50a86f7bd77fc02c6c 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h
@@ -14,6 +14,8 @@
 #include <TNL/tnlConfig.h>
 #include <TNL/Math.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/Reduction.h>
 #include <TNL/Containers/Algorithms/reduction-operations.h>
@@ -23,36 +25,37 @@ namespace Containers {
 namespace Algorithms {
 template< typename Element, typename Index >
 ArrayOperations< Devices::Cuda >::
 allocateMemory( Element*& data,
                 const Index size )
 #ifdef HAVE_CUDA
-   checkCudaDevice;
    if( cudaMalloc( ( void** ) &data,
                    ( size_t ) size * sizeof( Element ) ) != cudaSuccess )
+   {
       data = 0;
-   return checkCudaDevice;
+      throw Exceptions::CudaBadAlloc();
+   }
-   CudaSupportMissingMessage;
-   return false;
+   throw Exceptions::CudaSupportMissing();
 template< typename Element >
 ArrayOperations< Devices::Cuda >::
 freeMemory( Element* data )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to free a nullptr." );
 #ifdef HAVE_CUDA
-      checkCudaDevice;
-      cudaFree( data );
-      return checkCudaDevice;
+   cudaFree( data );
-      CudaSupportMissingMessage;;
-   return true;
+   throw Exceptions::CudaSupportMissing();
@@ -62,7 +65,7 @@ ArrayOperations< Devices::Cuda >::
 setMemoryElement( Element* data,
                   const Element& value )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
    ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 );
@@ -71,7 +74,7 @@ Element
 ArrayOperations< Devices::Cuda >::
 getMemoryElement( const Element* data )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
    Element result;
    ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 );
    return result;
@@ -82,7 +85,7 @@ Element&
 ArrayOperations< Devices::Cuda >::
 getArrayElementReference( Element* data, const Index i )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to access data through a nullptr." );
    return data[ i ];
@@ -91,7 +94,7 @@ const
 Element& ArrayOperations< Devices::Cuda >::
 getArrayElementReference( const Element* data, const Index i )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to access data through a nullptr." );
    return data[ i ];
@@ -120,17 +123,16 @@ setMemory( Element* data,
            const Element& value,
            const Index size )
-   TNL_ASSERT( data, );
+   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
 #ifdef HAVE_CUDA
    dim3 blockSize( 0 ), gridSize( 0 );
    blockSize. x = 256;
    Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
    gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
    setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );
-   return checkCudaDevice;
-   CudaSupportMissingMessage;;
-   return false;
+   throw Exceptions::CudaSupportMissing();
@@ -162,30 +164,29 @@ copyMemory( DestinationElement* destination,
             const SourceElement* source,
             const Index size )
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   #ifdef HAVE_CUDA
-      if( std::is_same< DestinationElement, SourceElement >::value )
-      {
-         if( cudaMemcpy( destination,
-                         source,
-                         size * sizeof( DestinationElement ),
-                         cudaMemcpyDeviceToDevice ) != cudaSuccess )
-         return checkCudaDevice;
-      }
-      else
-      {
-         dim3 blockSize( 0 ), gridSize( 0 );
-         blockSize. x = 256;
-         Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
-         gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
-         copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
-         return checkCudaDevice;
-      }
-   #else
-      CudaSupportMissingMessage;;
-   #endif
-      return false;
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+#ifdef HAVE_CUDA
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      cudaMemcpy( destination,
+                  source,
+                  size * sizeof( DestinationElement ),
+                  cudaMemcpyDeviceToDevice );
+      return TNL_CHECK_CUDA_DEVICE;
+   }
+   else
+   {
+      dim3 blockSize( 0 ), gridSize( 0 );
+      blockSize. x = 256;
+      Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
+      gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
+      copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
+      return TNL_CHECK_CUDA_DEVICE;
+   }
+   throw Exceptions::CudaSupportMissing();
 template< typename Element1,
@@ -197,8 +198,8 @@ compareMemory( const Element1* destination,
                const Element2* source,
                const Index size )
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
    //TODO: The parallel reduction on the CUDA device with different element types is needed.
    bool result;
    Algorithms::tnlParallelReductionEqualities< Element1, Index > reductionEqualities;
@@ -219,30 +220,21 @@ copyMemory( DestinationElement* destination,
             const SourceElement* source,
             const Index size )
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   #ifdef HAVE_CUDA
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+#ifdef HAVE_CUDA
    if( std::is_same< DestinationElement, SourceElement >::value )
-      cudaMemcpy( destination,
-                  source,
-                  size * sizeof( DestinationElement ),
-                  cudaMemcpyDeviceToHost );
-      if( ! checkCudaDevice )
-      {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyDeviceToHost ) != cudaSuccess )
          std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
-         return false;
-      }
-      return true;
+      return TNL_CHECK_CUDA_DEVICE;
       SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ];
-      if( ! buffer )
-      {
-         std::cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << std::endl;
-         return false;
-      }
       Index i( 0 );
       while( i < size )
@@ -251,9 +243,9 @@ copyMemory( DestinationElement* destination,
                          min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
                          cudaMemcpyDeviceToHost ) != cudaSuccess )
-            checkCudaDevice;
             delete[] buffer;
-            return false;
+            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+            return TNL_CHECK_CUDA_DEVICE;
          Index j( 0 );
          while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
@@ -265,11 +257,10 @@ copyMemory( DestinationElement* destination,
       delete[] buffer;
-   #else
-      CudaSupportMissingMessage;;
-      return false;
-   #endif
    return true;
+   throw Exceptions::CudaSupportMissing();
@@ -285,16 +276,11 @@ compareMemory( const Element1* destination,
     * Here, destination is on host and source is on CUDA device.
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-   #ifdef HAVE_CUDA
+   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
    Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ];
-   if( ! host_buffer )
-   {
-      std::cerr << "I am sorry but I cannot allocate supporting buffer on the host for comparing data between CUDA GPU and CPU." << std::endl;
-      return false;
-   }
    Index compared( 0 );
    while( compared < size )
@@ -304,10 +290,9 @@ compareMemory( const Element1* destination,
                       transfer * sizeof( Element2 ),
                       cudaMemcpyDeviceToHost ) != cudaSuccess )
-         std::cerr << "Transfer of data from the device failed." << std::endl;
-         checkCudaDevice;
          delete[] host_buffer;
-         return false;
+         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
+         return TNL_CHECK_CUDA_DEVICE;
       if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) )
@@ -318,10 +303,9 @@ compareMemory( const Element1* destination,
    delete[] host_buffer;
    return true;
-   #else
-      CudaSupportMissingMessage;;
-      return false;
-   #endif
+   throw Exceptions::CudaSupportMissing();
@@ -336,31 +320,22 @@ copyMemory( DestinationElement* destination,
             const SourceElement* source,
             const Index size )
-   TNL_ASSERT( destination, );
-   TNL_ASSERT( source, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
-   #ifdef HAVE_CUDA
+   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
+   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
+   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
+#ifdef HAVE_CUDA
    if( std::is_same< DestinationElement, SourceElement >::value )
-      cudaMemcpy( destination,
-                  source,
-                  size * sizeof( DestinationElement ),
-                  cudaMemcpyHostToDevice );
-      if( ! checkCudaDevice )
-      {
+      if( cudaMemcpy( destination,
+                      source,
+                      size * sizeof( DestinationElement ),
+                      cudaMemcpyHostToDevice ) != cudaSuccess )
          std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
-         return false;
-      }
-      return true;
+      return TNL_CHECK_CUDA_DEVICE;
       DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ];
-      if( ! buffer )
-      {
-         std::cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << std::endl;
-         return false;
-      }
       Index i( 0 );
       while( i < size )
@@ -375,19 +350,18 @@ copyMemory( DestinationElement* destination,
                          j * sizeof( DestinationElement ),
                          cudaMemcpyHostToDevice ) != cudaSuccess )
-            checkCudaDevice;
             delete[] buffer;
-            return false;
+            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
+            return TNL_CHECK_CUDA_DEVICE;
          i += j;
       delete[] buffer;
       return true;
-   #else
-      CudaSupportMissingMessage;;
-      return false;
-   #endif
+   throw Exceptions::CudaSupportMissing();
 template< typename Element1,
@@ -399,9 +373,9 @@ compareMemory( const Element1* hostData,
                const Element2* deviceData,
                const Index size )
-   TNL_ASSERT( hostData, );
-   TNL_ASSERT( deviceData, );
-   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
+   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
+   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
    return ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory( deviceData, hostData, size );
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
index 8d6df347765c636f67bd6122801fa26971670501..7390cfdb0e317a036f45c1db7b58d09a18afac0f 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h
@@ -21,24 +21,27 @@ namespace Containers {
 namespace Algorithms {
 template< typename Element, typename Index >
 ArrayOperations< Devices::Host >::
 allocateMemory( Element*& data,
                 const Index size )
-   if( ! ( data = new Element[ size ] ) )
-      return false;
-   return true;
+   data = new Element[ size ];
+   // According to the standard, new either throws, or returns non-nullptr.
+   // Some (old) compilers don't comply:
+   // https://stackoverflow.com/questions/550451/will-new-return-null-in-any-case
+   TNL_ASSERT_TRUE( data, "Operator 'new' returned a nullptr. This should never happen - there is "
+                          "either a bug or the compiler does not comply to the standard." );
 template< typename Element >
 ArrayOperations< Devices::Host >::
 freeMemory( Element* data )
    delete[] data;
-   return true;
 template< typename Element >
 ArrayOperations< Devices::Host >::
@@ -95,7 +98,9 @@ copyMemory( DestinationElement* destination,
             const SourceElement* source,
             const Index size )
-   if( std::is_same< DestinationElement, SourceElement >::value )
+   if( std::is_same< DestinationElement, SourceElement >::value &&
+       ( std::is_fundamental< DestinationElement >::value ||
+         std::is_pointer< DestinationElement >::value ) )
       memcpy( destination, source, size * sizeof( DestinationElement ) );
       for( Index i = 0; i < size; i ++ )
@@ -112,7 +117,9 @@ compareMemory( const DestinationElement* destination,
                const SourceElement* source,
                const Index size )
-   if( std::is_same< DestinationElement, SourceElement >::value )
+   if( std::is_same< DestinationElement, SourceElement >::value &&
+       ( std::is_fundamental< DestinationElement >::value ||
+         std::is_pointer< DestinationElement >::value ) )
       if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 )
          return false;
diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..754be26878ae64076ec52c61cd793ae132fd12b6
--- /dev/null
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h
@@ -0,0 +1,444 @@
+                          ArrayOperationsMIC_impl.h  -  description
+                             -------------------
+    begin                : Mar 4, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Vit Hanousek
+#pragma once
+#include <iostream>
+#include <TNL/tnlConfig.h>
+#include <TNL/Math.h>
+#include <TNL/Exceptions/MICSupportMissing.h>
+#include <TNL/Exceptions/MICBadAlloc.h>
+#include <TNL/Containers/Algorithms/ArrayOperations.h>
+#include <TNL/Containers/Algorithms/Reduction.h>
+#include <TNL/Containers/Algorithms/reduction-operations.h>
+namespace TNL {
+namespace Containers {
+namespace Algorithms {
+static constexpr std::size_t MIC_STACK_VAR_LIM = 5*1024*1024;
+template< typename Element, typename Index >
+ArrayOperations< Devices::MIC >::
+allocateMemory( Element*& data,
+                const Index size )
+#ifdef HAVE_MIC
+   data = (Element*) Devices::MIC::AllocMIC( size * sizeof(Element) );
+   if( ! data )
+      throw Exceptions::MICBadAlloc();
+   throw Exceptions::MICSupportMissing();
+template< typename Element >
+ArrayOperations< Devices::MIC >::
+freeMemory( Element* data )
+   TNL_ASSERT( data, );
+#ifdef HAVE_MIC
+   Devices::MIC::FreeMIC( data );
+   throw Exceptions::MICSupportMissing();
+template< typename Element >
+ArrayOperations< Devices::MIC >::
+setMemoryElement( Element* data,
+                  const Element& value )
+   TNL_ASSERT( data, );
+   ArrayOperations< Devices::MIC >::setMemory( data, value, 1 );
+template< typename Element >
+ArrayOperations< Devices::MIC >::
+getMemoryElement( const Element* data )
+   TNL_ASSERT( data, );
+   Element result;
+   ArrayOperations< Devices::Host, Devices::MIC >::copyMemory< Element, Element, int >( &result, data, 1 );
+   return result;
+template< typename Element, typename Index >
+ArrayOperations< Devices::MIC >::
+getArrayElementReference( Element* data, const Index i )
+   TNL_ASSERT( data, );
+   return data[ i ];
+template< typename Element, typename Index >
+Element& ArrayOperations< Devices::MIC >::
+getArrayElementReference( const Element* data, const Index i )
+   TNL_ASSERT( data, );
+   return data[ i ];
+template< typename Element, typename Index >
+ArrayOperations< Devices::MIC >::
+setMemory( Element* data,
+           const Element& value,
+           const Index size )
+   TNL_ASSERT( data, );
+#ifdef HAVE_MIC
+   Element tmp=value;
+   Devices::MICHider<Element> hide_ptr;
+   hide_ptr.pointer=data;
+   #pragma offload target(mic) in(hide_ptr,tmp,size)
+   {
+       Element * dst= hide_ptr.pointer;
+       for(int i=0;i<size;i++)
+           dst[i]=tmp;
+   }
+   return true;
+   throw Exceptions::MICSupportMissing();
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+ArrayOperations< Devices::MIC >::
+copyMemory( DestinationElement* destination,
+            const SourceElement* source,
+            const Index size )
+   TNL_ASSERT( destination, );
+   TNL_ASSERT( source, );
+   #ifdef HAVE_MIC
+      if( std::is_same< DestinationElement, SourceElement >::value )
+      {
+         Devices::MICHider<void> src_ptr;
+         src_ptr.pointer=(void*)source;
+         Devices::MICHider<void> dst_ptr;
+         dst_ptr.pointer=(void*)destination;
+         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
+         {
+             memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement));
+         }
+         return true;
+      }
+      else
+      {
+         Devices::MICHider<const SourceElement> src_ptr;
+         src_ptr.pointer=source;
+         Devices::MICHider<DestinationElement> dst_ptr;
+         dst_ptr.pointer=destination;
+         #pragma offload target(mic) in(src_ptr,dst_ptr,size)
+         {
+             for(int i=0;i<size;i++)
+                 dst_ptr.pointer[i]=src_ptr.pointer[i];
+         }
+         return true;
+      }
+   #else
+      throw Exceptions::MICSupportMissing();
+   #endif
+      return false;
+template< typename Element1,
+          typename Element2,
+          typename Index >
+ArrayOperations< Devices::MIC >::
+compareMemory( const Element1* destination,
+               const Element2* source,
+               const Index size )
+   TNL_ASSERT( destination, );
+   TNL_ASSERT( source, );
+#ifdef HAVE_MIC
+   if( std::is_same< Element1, Element2 >::value )
+   {
+      Devices::MICHider<void> src_ptr;
+      src_ptr.pointer=(void*)source;
+      Devices::MICHider<void> dst_ptr;
+      dst_ptr.pointer=(void*)destination;
+      int ret=0;
+      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
+      {
+          ret=memcmp(dst_ptr.pointer,src_ptr.pointer,size*sizeof(Element1));
+      }
+      if(ret==0)
+          return true;
+   }
+   else
+   {
+      Devices::MICHider<const Element1> src_ptr;
+      src_ptr.pointer=source;
+      Devices::MICHider<const Element2> dst_ptr;
+      dst_ptr.pointer=destination;
+      bool ret=false;
+      #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret)
+      {
+          int i=0;
+          for(i=0;i<size;i++)
+              if(dst_ptr.pointer[i]!=src_ptr.pointer[i])
+                  break;
+          if(i==size)
+              ret=true;
+          else
+              ret=false;
+      }
+      return ret;
+   }
+   return false;
+   throw Exceptions::MICSupportMissing();
+ * Operations MIC -> Host
+ */
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+ArrayOperations< Devices::Host, Devices::MIC >::
+copyMemory( DestinationElement* destination,
+            const SourceElement* source,
+            const Index size )
+   TNL_ASSERT( destination, );
+   TNL_ASSERT( source, );
+#ifdef HAVE_MIC
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      Devices::MICHider<void> src_ptr;
+      src_ptr.pointer=(void*)source;
+      //JAKA KONSTANTA se vejde do stacku 5MB?
+      if(size<MIC_STACK_VAR_LIM)
+      {
+         uint8_t tmp[size*sizeof(SourceElement)];
+         #pragma offload target(mic) in(src_ptr,size) out(tmp)
+         {
+              memcpy((void*)&tmp,src_ptr.pointer,size*sizeof(SourceElement));
+         }
+         memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement));
+         return true;
+      }
+      else
+      {
+          //direct -- pomalejší
+          uint8_t* tmp=(uint8_t*)destination;
+          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size))
+          {
+              memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement));
+          }
+          return true;
+      }
+   }
+   else
+   {
+      Devices::MICHider<const SourceElement> src_ptr;
+      src_ptr.pointer=source;
+      if(size<MIC_STACK_VAR_LIM)
+      {
+         uint8_t tmp[size*sizeof(DestinationElement)];
+         #pragma offload target(mic) in(src_ptr,size) out(tmp)
+         {
+              DestinationElement *dst=(DestinationElement*)&tmp;
+              for(int i=0;i<size;i++)
+                  dst[i]=src_ptr.pointer[i];
+         }
+         memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement));
+         return true;
+      }
+      else
+      {
+          //direct pseudo heap-- pomalejší
+          uint8_t* tmp=(uint8_t*)destination;
+          #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size*sizeof(DestinationElement)))
+          {
+              DestinationElement *dst=(DestinationElement*)tmp;
+              for(int i=0;i<size;i++)
+                  dst[i]=src_ptr.pointer[i];
+          }
+          return true;
+      }
+   }
+   return false;
+   throw Exceptions::MICSupportMissing();
+template< typename Element1,
+          typename Element2,
+          typename Index >
+ArrayOperations< Devices::Host, Devices::MIC >::
+compareMemory( const Element1* destination,
+               const Element2* source,
+               const Index size )
+   /***
+    * Here, destination is on host and source is on MIC device.
+    */
+   TNL_ASSERT( destination, );
+   TNL_ASSERT( source, );
+   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
+#ifdef HAVE_MIC
+   Index compared( 0 );
+   Index transfer( 0 );
+   std::size_t max_transfer=MIC_STACK_VAR_LIM/sizeof(Element2);
+   uint8_t host_buffer[max_transfer*sizeof(Element2)];
+   Devices::MICHider<const Element2> src_ptr;
+   while( compared < size )
+   {
+     transfer=min(size-compared,max_transfer);
+     src_ptr.pointer=source+compared;
+     #pragma offload target(mic) out(host_buffer) in(src_ptr,transfer)
+     {
+         memcpy((void*)&host_buffer,(void*)src_ptr.pointer,transfer*sizeof(Element2));
+     }
+     if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], (Element2*)&host_buffer, transfer ) )
+     {
+        return false;
+     }
+     compared += transfer;
+   }
+   return true;
+   throw Exceptions::MICSupportMissing();
+ * Operations Host -> MIC
+ */
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+ArrayOperations< Devices::MIC, Devices::Host >::
+copyMemory( DestinationElement* destination,
+            const SourceElement* source,
+            const Index size )
+   TNL_ASSERT( destination, );
+   TNL_ASSERT( source, );
+   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
+#ifdef HAVE_MIC
+   if( std::is_same< DestinationElement, SourceElement >::value )
+   {
+      Devices::MICHider<void> dst_ptr;
+      dst_ptr.pointer=(void*)destination;
+      //JAKA KONSTANTA se vejde do stacku 5MB?
+      if(size<MIC_STACK_VAR_LIM)
+      {
+         uint8_t tmp[size*sizeof(SourceElement)];
+         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
+         #pragma offload target(mic) in(dst_ptr,tmp,size)
+         {
+              memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement));
+         }
+         return true;
+      }
+      else
+      {
+          //direct pseudo heap-- pomalejší
+          uint8_t* tmp=(uint8_t*)source;
+          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size))
+          {
+              memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement));
+          }
+          return true;
+      }
+   }
+   else
+   {
+      Devices::MICHider<DestinationElement> dst_ptr;
+      dst_ptr.pointer=destination;
+      if(size<MIC_STACK_VAR_LIM)
+      {
+         uint8_t tmp[size*sizeof(SourceElement)];
+         memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement));
+         #pragma offload target(mic) in(dst_ptr,size,tmp)
+         {
+              SourceElement *src=(SourceElement*)&tmp;
+              for(int i=0;i<size;i++)
+                  dst_ptr.pointer[i]=src[i];
+         }
+         return true;
+      }
+      else
+      {
+          //direct pseudo heap-- pomalejší
+          uint8_t* tmp=(uint8_t*)source;
+          #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size*sizeof(SourceElement)))
+          {
+              SourceElement *src=(SourceElement*)tmp;
+              for(int i=0;i<size;i++)
+                  dst_ptr.pointer[i]=src[i];
+          }
+          return true;
+      }
+   }
+   return false;
+   throw Exceptions::MICSupportMissing();
+template< typename Element1,
+          typename Element2,
+          typename Index >
+ArrayOperations< Devices::MIC, Devices::Host >::
+compareMemory( const Element1* hostData,
+               const Element2* deviceData,
+               const Index size )
+   TNL_ASSERT( hostData, );
+   TNL_ASSERT( deviceData, );
+   TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
+   return ArrayOperations< Devices::Host, Devices::MIC >::compareMemory( deviceData, hostData, size );
+} // namespace Algorithms
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CMakeLists.txt b/src/TNL/Containers/Algorithms/CMakeLists.txt
old mode 100755
new mode 100644
index 4b2744aced7e565692ee3a2469580fff1f7186a8..89fbb0368766b45a0c9aca161c44ef29bce5db47
--- a/src/TNL/Containers/Algorithms/CMakeLists.txt
+++ b/src/TNL/Containers/Algorithms/CMakeLists.txt
@@ -3,6 +3,7 @@ ADD_SUBDIRECTORY( TemplateExplicitInstantiation )
 set( headers ArrayOperations.h
+             ArrayOperationsMIC_impl.h
@@ -17,6 +18,7 @@ set( headers ArrayOperations.h
+             VectorOperationsMIC_impl.h
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Containers/Algorithms )
diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
index 5ec18f3c13a5227fe081d8f0757dd4e62b8e994a..bc12d3030a17d95280c949a11f738c61152ed05c 100644
--- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h
@@ -32,6 +32,9 @@ namespace Algorithms {
  * architecture so that there are no local memory spills.
 static constexpr int Multireduction_maxThreadsPerBlock = 256;  // must be a power of 2
+static constexpr int Multireduction_registersPerThread = 38;   // empirically determined optimal value
+// __CUDA_ARCH__ is defined only in device code!
 #if (__CUDA_ARCH__ >= 300 )
    static constexpr int Multireduction_minBlocksPerMultiprocessor = 6;
@@ -187,12 +190,14 @@ CudaMultireductionKernelLauncher( Operation& operation,
    // we run the kernel with a fixed number of blocks, so the amount of work per
    // block increases with enlarging the problem, so even small imbalance can
    // cost us dearly.
-   // On Tesla K40c, desGridSizeX = 4 * 6 * 15 = 360.
-//   const IndexType desGridSizeX = 4 * Multireduction_minBlocksPerMultiprocessor
-//                                    * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   // On Tesla K40c, desGridSizeX = 6 * 15 = 90.
-   const IndexType desGridSizeX = Multireduction_minBlocksPerMultiprocessor
-                                * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+   // Therefore,  desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
+   // where blocksPerMultiprocessor is determined according to the number of
+   // available registers on the multiprocessor.
+   // On Tesla K40c, desGridSize = 8 * 15 = 120.
+   const int activeDevice = Devices::CudaDeviceInfo::getActiveDevice();
+   const int blocksdPerMultiprocessor = Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+                                      / ( Multireduction_maxThreadsPerBlock * Multireduction_registersPerThread );
+   const int desGridSizeX = blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice );
    dim3 blockSize, gridSize;
    // version A: max 16 rows of threads
@@ -230,8 +235,7 @@ CudaMultireductionKernelLauncher( Operation& operation,
    // (make an overestimate to avoid reallocation on every call if n is increased by 1 each time)
    const size_t buf_size = 8 * ( n / 8 + 1 ) * desGridSizeX * sizeof( ResultType );
    CudaReductionBuffer& cudaReductionBuffer = CudaReductionBuffer::getInstance();
-   if( ! cudaReductionBuffer.setSize( buf_size ) )
-      throw 1;
+   cudaReductionBuffer.setSize( buf_size );
    output = cudaReductionBuffer.template getData< ResultType >();
    // when there is only one warp per blockSize.x, we need to allocate two warps
@@ -304,7 +308,7 @@ CudaMultireductionKernelLauncher( Operation& operation,
          TNL_ASSERT( false, std::cerr << "Block size is " << blockSize.x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." << std::endl );
-   checkCudaDevice;
    // return the size of the output array on the CUDA device
    return gridSize.x;
diff --git a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h b/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
index 3b7a2150267b2a98c845221bd54d95a9ae509fff..2897c7280a6bc61f9b60a9cb3c7b44a94ad20de3 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionBuffer.h
@@ -8,10 +8,15 @@
 /* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
 #pragma once
 #include <stdlib.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 namespace TNL {
 namespace Containers {
@@ -20,35 +25,34 @@ namespace Algorithms {
 class CudaReductionBuffer
-      inline static CudaReductionBuffer& getInstance( size_t size = 0 )
+      inline static CudaReductionBuffer& getInstance()
-         static CudaReductionBuffer instance( size );
+         static CudaReductionBuffer instance;
          return instance;
-      inline bool setSize( size_t size )
+      inline void setSize( size_t size )
 #ifdef HAVE_CUDA
          if( size > this->size )
-            if( data ) cudaFree( data );
-            this->size = size;
-            if( cudaMalloc( ( void** ) &this->data, size ) != cudaSuccess )
-            {
-               std::cerr << "I am not able to allocate reduction buffer on the GPU." << std::endl;
+            this->free();
+            if( cudaMalloc( ( void** ) &this->data, size ) != cudaSuccess ) {
                this->data = 0;
+               throw Exceptions::CudaBadAlloc();
-            return checkCudaDevice;
+            this->size = size;
-         else
-            return true;
-         return false;
+         throw Exceptions::CudaSupportMissing();
       template< typename Type >
-      Type* getData() { return ( Type* ) this->data; }
+      Type* getData()
+      {
+         return ( Type* ) this->data;
+      }
       // stop the compiler generating methods of copy the object
@@ -56,10 +60,10 @@ class CudaReductionBuffer
       CudaReductionBuffer& operator=( CudaReductionBuffer const& copy ); // Not Implemented
       // private constructor of the singleton
-      inline CudaReductionBuffer( size_t size = 0 ): data( 0 ), size( 0 )
+      inline CudaReductionBuffer( size_t size = 0 )
 #ifdef HAVE_CUDA
-         if( size != 0 ) setSize( size );
+         setSize( size );
          atexit( CudaReductionBuffer::free_atexit );
@@ -76,17 +80,17 @@ class CudaReductionBuffer
          if( data )
             cudaFree( data );
-            data = 0;
+            data = nullptr;
-      void* data;
+      void* data = nullptr;
-      size_t size;
+      size_t size = 0;
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 98a8841a36917cbb420dd54ef25c619a699853fb..a456dad4fa25dacc41f55b79d8fcf6791c5dfd35 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -30,6 +30,9 @@ namespace Algorithms {
  * architecture so that there are no local memory spills.
 static constexpr int Reduction_maxThreadsPerBlock = 256;  // must be a power of 2
+static constexpr int Reduction_registersPerThread = 32;   // empirically determined optimal value
+// __CUDA_ARCH__ is defined only in device code!
 #if (__CUDA_ARCH__ >= 300 )
    static constexpr int Reduction_minBlocksPerMultiprocessor = 8;
@@ -189,20 +192,22 @@ CudaReductionKernelLauncher( Operation& operation,
    // we run the kernel with a fixed number of blocks, so the amount of work per
    // block increases with enlarging the problem, so even small imbalance can
    // cost us dearly.
-   // On Tesla K40c, desGridSize = 4 * 6 * 15 = 360.
-//   const IndexType desGridSize = 4 * Reduction_minBlocksPerMultiprocessor
-//                                   * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   // On Tesla K40c, desGridSize = 6 * 15 = 90.
-   const IndexType desGridSize = Reduction_minBlocksPerMultiprocessor
-                               * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
-   dim3 blockSize( 256 ), gridSize( 0 );
+   // Therefore,  desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
+   // where blocksPerMultiprocessor is determined according to the number of
+   // available registers on the multiprocessor.
+   // On Tesla K40c, desGridSize = 8 * 15 = 120.
+   const int activeDevice = Devices::CudaDeviceInfo::getActiveDevice();
+   const int blocksdPerMultiprocessor = Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice )
+                                      / ( Reduction_maxThreadsPerBlock * Reduction_registersPerThread );
+   const int desGridSize = blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice );
+   dim3 blockSize, gridSize;
+   blockSize.x = Reduction_maxThreadsPerBlock;
    gridSize.x = min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
    // create reference to the reduction buffer singleton and set size
    const size_t buf_size = desGridSize * sizeof( ResultType );
    CudaReductionBuffer& cudaReductionBuffer = CudaReductionBuffer::getInstance();
-   if( ! cudaReductionBuffer.setSize( buf_size ) )
-      throw 1;
+   cudaReductionBuffer.setSize( buf_size );
    output = cudaReductionBuffer.template getData< ResultType >();
    // when there is only one warp per blockSize.x, we need to allocate two warps
@@ -273,7 +278,7 @@ CudaReductionKernelLauncher( Operation& operation,
          TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
-   checkCudaDevice;
    // return the size of the output array on the CUDA device
    return gridSize.x;
diff --git a/src/TNL/Containers/Algorithms/Multireduction.h b/src/TNL/Containers/Algorithms/Multireduction.h
index 9087db93ba6bfded63438c2e59175cc80794042c..8aa314e754d9657cef49d09c42d0bd544a2848b7 100644
--- a/src/TNL/Containers/Algorithms/Multireduction.h
+++ b/src/TNL/Containers/Algorithms/Multireduction.h
@@ -54,6 +54,21 @@ public:
            typename Operation::ResultType* hostResult );
+class Multireduction< Devices::MIC >
+   template< typename Operation >
+   static bool
+   reduce( Operation& operation,
+           int n,
+           const typename Operation::IndexType size,
+           const typename Operation::RealType* deviceInput1,
+           const typename Operation::IndexType ldInput1,
+           const typename Operation::RealType* deviceInput2,
+           typename Operation::ResultType* hostResult );
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Multireduction_impl.h b/src/TNL/Containers/Algorithms/Multireduction_impl.h
index 01db7844985447ac883020dde9478987c4d3b2a9..e5433f045e90c1ce5cd2f4d090b5a2e5a863464c 100644
--- a/src/TNL/Containers/Algorithms/Multireduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Multireduction_impl.h
@@ -17,6 +17,7 @@
 #include <TNL/Assert.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Containers/Algorithms/reduction-operations.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/CudaMultireductionKernel.h>
@@ -59,8 +60,8 @@ reduce( Operation& operation,
         typename Operation::ResultType* hostResult )
 #ifdef HAVE_CUDA
-   TNL_ASSERT( n > 0, );
-   TNL_ASSERT( size <= ldInput1, );
+   TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
+   TNL_ASSERT_LE( size, ldInput1, "The size of the input cannot exceed its leading dimension." );
    typedef typename Operation::IndexType IndexType;
    typedef typename Operation::RealType RealType;
@@ -143,10 +144,9 @@ reduce( Operation& operation,
       std::cout << "   Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
-   return checkCudaDevice;
-   CudaSupportMissingMessage;
-   return false;
+   throw Exceptions::CudaSupportMissing();
@@ -171,8 +171,8 @@ reduce( Operation& operation,
         const typename Operation::RealType* input2,
         typename Operation::ResultType* result )
-   TNL_ASSERT( n > 0, );
-   TNL_ASSERT( size <= ldInput1, );
+   TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
+   TNL_ASSERT_LE( size, ldInput1, "The size of the input cannot exceed its leading dimension." );
    typedef typename Operation::IndexType IndexType;
    typedef typename Operation::RealType RealType;
@@ -250,6 +250,30 @@ reduce( Operation& operation,
    return true;
+template< typename Operation >
+Multireduction< Devices::MIC >::
+reduce( Operation& operation,
+        int n,
+        const typename Operation::IndexType size,
+        const typename Operation::RealType* input1,
+        const typename Operation::IndexType ldInput1,
+        const typename Operation::RealType* input2,
+        typename Operation::ResultType* result )
+   TNL_ASSERT( n > 0, );
+   TNL_ASSERT( size <= ldInput1, );
+   typedef typename Operation::IndexType IndexType;
+   typedef typename Operation::RealType RealType;
+   typedef typename Operation::ResultType ResultType;
+   std::cout << "Not Implemented yet Multireduction< Devices::MIC >::reduce" << std::endl;
+   return true;
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h
index cd4d636c824481c56c56e30fb6f285045df0fe7c..e684b44ac5393adf2348b3a6fed45ea8bba57370 100644
--- a/src/TNL/Containers/Algorithms/Reduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Reduction_impl.h
@@ -15,6 +15,7 @@
 #include <TNL/Assert.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Containers/Algorithms/reduction-operations.h>
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Containers/Algorithms/CudaReductionKernel.h>
@@ -51,11 +52,19 @@ reductionOnCudaDevice( Operation& operation,
    typedef typename Operation::ResultType ResultType;
    typedef typename Operation::LaterReductionOperation LaterReductionOperation;
+   /***
+    * Only fundamental and pointer types can be safely reduced on host. Complex
+    * objects stored on the device might contain pointers into the device memory,
+    * in which case reduction on host might fail.
+    */
+   constexpr bool can_reduce_all_on_host = std::is_fundamental< RealType >::value || std::is_pointer< RealType >::value;
+   constexpr bool can_reduce_later_on_host = std::is_fundamental< ResultType >::value || std::is_pointer< ResultType >::value;
     * First check if the input array(s) is/are large enough for the reduction on GPU.
     * Otherwise copy it/them to host and reduce on CPU.
-   if( size <= minGPUReductionDataSize )
+   if( can_reduce_all_on_host && size <= minGPUReductionDataSize )
       RealType hostArray1[ minGPUReductionDataSize ];
       RealType hostArray2[ minGPUReductionDataSize ];
@@ -92,40 +101,68 @@ reductionOnCudaDevice( Operation& operation,
-   /***
-    * Transfer the reduced data from device to host.
-    */
-   ResultType resultArray[ reducedSize ];
-   if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ResultType, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) )
-      return false;
-      timer.stop();
-      std::cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl;
-   #endif
+   if( can_reduce_later_on_host ) {
+      /***
+       * Transfer the reduced data from device to host.
+       */
+      ResultType resultArray[ reducedSize ];
+      if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ResultType, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) )
+         return false;
+         timer.stop();
+         std::cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl;
+         timer.reset();
+         timer.start();
+      #endif
+      /***
+       * Reduce the data on the host system.
+       */
+      LaterReductionOperation laterReductionOperation;
+      result = laterReductionOperation. initialValue();
+      for( IndexType i = 0; i < reducedSize; i ++ )
+         result = laterReductionOperation.reduceOnHost( i, result, resultArray, ( ResultType*) 0 );
+         timer.stop();
+         std::cout << "   Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
+      #endif
+   }
+   else {
+      /***
+       * Data can't be safely reduced on host, so continue with the reduction on the CUDA device.
+       */
+      LaterReductionOperation laterReductionOperation;
+      while( reducedSize > 1 ) {
+         reducedSize = CudaReductionKernelLauncher( laterReductionOperation,
+                                                    reducedSize,
+                                                    deviceAux1,
+                                                    (ResultType*) 0,
+                                                    deviceAux1 );
+      }
+         timer.stop();
+         std::cout << "   Reduction of small data set on GPU took " << timer.getRealTime() << " sec. " << std::endl;
+         timer.reset();
+         timer.start();
+      #endif
+      ResultType resultArray[ 1 ];
+      if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ResultType, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) )
+         return false;
+      result = resultArray[ 0 ];
-      timer.reset();
-      timer.start();
-   #endif
-   /***
-    * Reduce the data on the host system.
-    */
-   LaterReductionOperation laterReductionOperation;
-   result = laterReductionOperation. initialValue();
-   for( IndexType i = 0; i < reducedSize; i ++ )
-      result = laterReductionOperation.reduceOnHost( i, result, resultArray, ( ResultType*) 0 );
-      timer.stop();
-      std::cout << "   Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
-   #endif
+         timer.stop();
+         std::cout << "   Transferring the result to CPU took " << timer.getRealTime() << " sec. " << std::endl;
+      #endif
+   }
-   return checkCudaDevice;
-   CudaSupportMissingMessage;;
-   return false;
+   throw Exceptions::CudaSupportMissing();
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Containers/Algorithms/VectorOperations.h b/src/TNL/Containers/Algorithms/VectorOperations.h
index fa490959f475fbe8a801fa33ef18767eb13d498d..fb24f97c178c8571d081b77496c91101b5a45e31 100644
--- a/src/TNL/Containers/Algorithms/VectorOperations.h
+++ b/src/TNL/Containers/Algorithms/VectorOperations.h
@@ -238,9 +238,123 @@ class VectorOperations< Devices::Cuda >
                                           const typename Vector::IndexType end );
+#ifdef HAVE_MIC
+class VectorOperations< Devices::MIC >
+   public:
+   template< typename Vector >
+   static void addElement( Vector& v,
+                           const typename Vector::IndexType i,
+                           const typename Vector::RealType& value );
+   template< typename Vector >
+   static void addElement( Vector& v,
+                           const typename Vector::IndexType i,
+                           const typename Vector::RealType& value,
+                           const typename Vector::RealType& thisElementMultiplicator );
+   template< typename Vector >
+   static typename Vector::RealType getVectorMax( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorMin( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorAbsMax( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorAbsMin( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorL1Norm( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorL2Norm( const Vector& v );
+   template< typename Vector >
+   static typename Vector::RealType getVectorLpNorm( const Vector& v,
+                                                     const typename Vector::RealType& p );
+   template< typename Vector >
+   static typename Vector::RealType getVectorSum( const Vector& v );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceMax( const Vector1& v1,
+                                                             const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceMin( const Vector1& v1,
+                                                               const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceAbsMax( const Vector1& v1,
+                                                                  const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceAbsMin( const Vector1& v1,
+                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceL1Norm( const Vector1& v1,
+                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceL2Norm( const Vector1& v1,
+                                                                const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceLpNorm( const Vector1& v1,
+                                                           const Vector2& v2,
+                                                           const typename Vector1::RealType& p );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getVectorDifferenceSum( const Vector1& v1,
+                                                               const Vector2& v2 );
+   template< typename Vector >
+   static void vectorScalarMultiplication( Vector& v,
+                                           const typename Vector::RealType& alpha );
+   template< typename Vector1, typename Vector2 >
+   static typename Vector1::RealType getScalarProduct( const Vector1& v1,
+                                                         const Vector2& v2 );
+   template< typename Vector1, typename Vector2 >
+   static void addVector( Vector1& y,
+                          const Vector2& x,
+                          const typename Vector2::RealType& alpha,
+                          const typename Vector1::RealType& thisMultiplicator = 1.0 );
+   template< typename Vector1, typename Vector2, typename Vector3 >
+   static void addVectors( Vector1& v,
+                           const Vector2& v1,
+                           const typename Vector2::RealType& multiplicator1,
+                           const Vector3& v2,
+                           const typename Vector3::RealType& multiplicator2,
+                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
+   template< typename Vector >
+   static void computePrefixSum( Vector& v,
+                                 const typename Vector::IndexType begin,
+                                 const typename Vector::IndexType end );
+   template< typename Vector >
+   static void computeExclusivePrefixSum( Vector& v,
+                                          const typename Vector::IndexType begin,
+                                          const typename Vector::IndexType end );
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
+#ifdef HAVE_MIC
+#include <TNL/Containers/Algorithms/VectorOperationsMIC_impl.h>
 #include <TNL/Containers/Algorithms/VectorOperationsHost_impl.h>
 #include <TNL/Containers/Algorithms/VectorOperationsCuda_impl.h>
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
index 5d4b29379d62a609752d1d2cdedbdaf0b1ba662b..c32f44bbaf3a318a8d43b9116baac563b26cdf5e 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
@@ -11,6 +11,7 @@
 #pragma once
 #include <TNL/tnlConfig.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Containers/Algorithms/VectorOperations.h>
 #include <TNL/Containers/Algorithms/cuda-prefix-sum.h>
 #include <TNL/Containers/Algorithms/CublasWrapper.h>
@@ -48,7 +49,7 @@ getVectorMax( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionMax< Real, Index > operation;
@@ -68,7 +69,7 @@ getVectorMin( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionMin< Real, Index > operation;
@@ -88,7 +89,7 @@ getVectorAbsMax( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionAbsMax< Real, Index > operation;
@@ -108,7 +109,7 @@ getVectorAbsMin( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionAbsMin< Real, Index > operation;
@@ -128,7 +129,7 @@ getVectorL1Norm( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionAbsSum< Real, Index > operation;
@@ -148,7 +149,7 @@ getVectorL2Norm( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionL2Norm< Real, Index > operation;
@@ -170,9 +171,8 @@ getVectorLpNorm( const Vector& v,
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
-   TNL_ASSERT( p > 0.0,
-              std::cerr << " p = " << p );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );
    if( p == 1 )
       return getVectorL1Norm( v );
@@ -197,7 +197,7 @@ getVectorSum( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0 );
    Algorithms::tnlParallelReductionSum< Real, Index > operation;
@@ -218,8 +218,8 @@ getVectorDifferenceMax( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffMax< Real, Index > operation;
@@ -240,8 +240,8 @@ getVectorDifferenceMin( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffMin< Real, Index > operation;
@@ -263,8 +263,8 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffAbsMax< Real, Index > operation;
@@ -285,8 +285,8 @@ getVectorDifferenceAbsMin( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffAbsMin< Real, Index > operation;
@@ -307,8 +307,8 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffAbsSum< Real, Index > operation;
@@ -329,8 +329,8 @@ getVectorDifferenceL2Norm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffL2Norm< Real, Index > operation;
@@ -353,10 +353,9 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( p > 0.0,
-              std::cerr << " p = " << p );
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
+   TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffLpNorm< Real, Index > operation;
@@ -378,8 +377,8 @@ getVectorDifferenceSum( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
    Algorithms::tnlParallelReductionDiffSum< Real, Index > operation;
@@ -414,24 +413,22 @@ VectorOperations< Devices::Cuda >::
 vectorScalarMultiplication( Vector& v,
                             const typename Vector::RealType& alpha )
-   typedef typename Vector::RealType Real;
-   typedef typename Vector::IndexType Index;
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
-   TNL_ASSERT( v.getSize() > 0, );
-   #ifdef HAVE_CUDA
-      dim3 blockSize( 0 ), gridSize( 0 );
-      const Index& size = v.getSize();
-      blockSize.x = 256;
-      Index blocksNumber = ceil( ( double ) size / ( double ) blockSize.x );
-      gridSize.x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
-      vectorScalarMultiplicationCudaKernel<<< gridSize, blockSize >>>( v.getData(),
-                                                                       size,
-                                                                       alpha );
-      checkCudaDevice;
-   #else
-      CudaSupportMissingMessage;;
-   #endif
+#ifdef HAVE_CUDA
+   typedef typename Vector::IndexType Index;   
+   dim3 blockSize( 0 ), gridSize( 0 );
+   const Index& size = v.getSize();
+   blockSize.x = 256;
+   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize.x );
+   gridSize.x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
+   vectorScalarMultiplicationCudaKernel<<< gridSize, blockSize >>>( v.getData(),
+                                                                    size,
+                                                                    alpha );
+   throw Exceptions::CudaSupportMissing();
@@ -444,8 +441,8 @@ getScalarProduct( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0 );
 /*#if defined HAVE_CUBLAS && defined HAVE_CUDA
@@ -498,31 +495,28 @@ addVector( Vector1& y,
            const typename Vector2::RealType& alpha,
            const typename Vector1::RealType& thisMultiplicator )
-   typedef typename Vector1::RealType Real;
-   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." );
-   TNL_ASSERT( y.getSize() > 0, );
-   TNL_ASSERT( y.getSize() == x.getSize(), );
-   TNL_ASSERT( y.getData() != 0, );
-   TNL_ASSERT( x.getData() != 0, );
-   #ifdef HAVE_CUDA
-      dim3 blockSize( 0 ), gridSize( 0 );
-      const Index& size = x.getSize();
-      dim3 cudaBlockSize( 256 );
-      dim3 cudaBlocks;
-      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
-      vectorAddVectorCudaKernel<<< cudaBlocks, cudaBlockSize >>>( y.getData(),
-                                                                  x.getData(),
-                                                                  size,
-                                                                  alpha,
-                                                                  thisMultiplicator);
-      checkCudaDevice;
-   #else
-      CudaSupportMissingMessage;;
-   #endif
+#ifdef HAVE_CUDA
+   typedef typename Vector1::IndexType Index;
+   dim3 blockSize( 0 ), gridSize( 0 );
+   const Index& size = x.getSize();
+   dim3 cudaBlockSize( 256 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
+   vectorAddVectorCudaKernel<<< cudaBlocks, cudaBlockSize >>>( y.getData(),
+                                                               x.getData(),
+                                                               size,
+                                                               alpha,
+                                                               thisMultiplicator);
+   throw Exceptions::CudaSupportMissing();
 #ifdef HAVE_CUDA
@@ -569,37 +563,30 @@ addVectors( Vector1& v,
             const typename Vector3::RealType& multiplicator2,
             const typename Vector1::RealType& thisMultiplicator )
-   typedef typename Vector1::RealType Real;
-   typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
-   TNL_ASSERT( v.getSize() == v1.getSize(), );
-   TNL_ASSERT( v.getSize() == v2.getSize(), );
-   TNL_ASSERT( v.getData() != 0, );
-   TNL_ASSERT( v1.getData() != 0, );
-   TNL_ASSERT( v2.getData() != 0, );
-   #ifdef HAVE_CUDA
-      dim3 blockSize( 0 ), gridSize( 0 );
-      const Index& size = v.getSize();
-      dim3 cudaBlockSize( 256 );
-      dim3 cudaBlocks;
-      cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
-      vectorAddVectorsCudaKernel<<< cudaBlocks, cudaBlockSize >>>( v.getData(),
-                                                                   v1.getData(),
-                                                                   v2.getData(),
-                                                                   size,
-                                                                   multiplicator1,
-                                                                   multiplicator2,
-                                                                   thisMultiplicator);
-      checkCudaDevice;
-   #else
-      CudaSupportMissingMessage;;
-   #endif
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." );
+   TNL_ASSERT_EQ( v.getSize(), v2.getSize(), "The vector sizes must be the same." );
+#ifdef HAVE_CUDA
+   typedef typename Vector1::IndexType Index;   
+   dim3 blockSize( 0 ), gridSize( 0 );
+   const Index& size = v.getSize();
+   dim3 cudaBlockSize( 256 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
+   vectorAddVectorsCudaKernel<<< cudaBlocks, cudaBlockSize >>>( v.getData(),
+                                                                v1.getData(),
+                                                                v2.getData(),
+                                                                size,
+                                                                multiplicator1,
+                                                                multiplicator2,
+                                                                thisMultiplicator);
+   throw Exceptions::CudaSupportMissing();
 template< typename Vector >
@@ -609,7 +596,7 @@ computePrefixSum( Vector& v,
                   typename Vector::IndexType begin,
                   typename Vector::IndexType end )
-   #ifdef HAVE_CUDA
+#ifdef HAVE_CUDA
    typedef Algorithms::tnlParallelReductionSum< typename Vector::RealType, typename Vector::IndexType > OperationType;
    OperationType operation;
@@ -621,10 +608,10 @@ computePrefixSum( Vector& v,
                                    &v.getData()[ begin ],
                                    &v.getData()[ begin ],
-                                   Algorithms::inclusivePrefixSum );
-   #else
-      CudaSupportMissingMessage;;
-   #endif
+                                   Algorithms::PrefixSumType::inclusive );
+   throw Exceptions::CudaSupportMissing();
 template< typename Vector >
@@ -646,7 +633,7 @@ computeExclusivePrefixSum( Vector& v,
                                    &v.getData()[ begin ],
                                    &v.getData()[ begin ],
-                                   Algorithms::exclusivePrefixSum );
+                                   Algorithms::PrefixSumType::exclusive );
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
index aab2a84769c1bf745351ec1e141df1eb740f04ce..d8cbca17eae6398235a165455872d7c9284dacd9 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
@@ -48,7 +48,7 @@ getVectorMax( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result = v.getElement( 0 );
    const Index n = v.getSize();
@@ -68,7 +68,7 @@ getVectorMin( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result = v.getElement( 0 );
    const Index n = v.getSize();
@@ -88,7 +88,7 @@ getVectorAbsMax( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result = std::fabs( v.getElement( 0 ) );
    const Index n = v.getSize();
@@ -109,7 +109,7 @@ getVectorAbsMin( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result = std::fabs( v.getElement( 0 ) );
    const Index n = v.getSize();
@@ -129,7 +129,7 @@ getVectorL1Norm( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0.0 );
    const Index n = v.getSize();
@@ -149,7 +149,7 @@ getVectorL2Norm( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    const Index n = v.getSize();
@@ -212,9 +212,8 @@ getVectorLpNorm( const Vector& v,
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
-   TNL_ASSERT( p > 0.0,
-              std::cerr << " p = " << p );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );
    if( p == 1.0 )
       return getVectorL1Norm( v );
@@ -239,7 +238,7 @@ getVectorSum( const Vector& v )
    typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    Real result( 0.0 );
    const Index n = v.getSize();
@@ -260,8 +259,8 @@ getVectorDifferenceMax( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result = v1.getElement( 0 ) - v2.getElement( 0 );
    const Index n = v1.getSize();
@@ -282,8 +281,8 @@ getVectorDifferenceMin( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result = v1.getElement( 0 ) - v2.getElement( 0 );
    const Index n = v1.getSize();
@@ -304,8 +303,8 @@ getVectorDifferenceAbsMax( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result = std::fabs( v1.getElement( 0 ) - v2.getElement( 0 ) );
    const Index n = v1.getSize();
@@ -326,8 +325,8 @@ getVectorDifferenceAbsMin( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result = std::fabs( v1[ 0 ] - v2[ 0 ] );
    const Index n = v1.getSize();
@@ -348,8 +347,8 @@ getVectorDifferenceL1Norm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0.0 );
    const Index n = v1.getSize();
@@ -370,8 +369,8 @@ getVectorDifferenceL2Norm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0.0 );
    const Index n = v1.getSize();
@@ -397,10 +396,9 @@ getVectorDifferenceLpNorm( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( p > 0.0,
-              std::cerr << " p = " << p );
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
+   TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );
    if( p == 1.0 )
       return getVectorDifferenceL1Norm( v1, v2 );
@@ -426,8 +424,8 @@ getVectorDifferenceSum( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    Real result( 0.0 );
    const Index n = v1.getSize();
@@ -446,10 +444,9 @@ VectorOperations< Devices::Host >::
 vectorScalarMultiplication( Vector& v,
                             const typename Vector::RealType& alpha )
-   typedef typename Vector::RealType Real;
    typedef typename Vector::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    const Index n = v.getSize();
@@ -469,8 +466,8 @@ getScalarProduct( const Vector1& v1,
    typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v1.getSize() > 0, );
-   TNL_ASSERT( v1.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
    const Index n = v1.getSize();
@@ -530,11 +527,10 @@ addVector( Vector1& y,
            const typename Vector2::RealType& alpha,
            const typename Vector1::RealType& thisMultiplicator )
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( x.getSize() > 0, );
-   TNL_ASSERT( x.getSize() == y.getSize(), );
+   TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." );
    const Index n = y.getSize();
@@ -542,6 +538,7 @@ addVector( Vector1& y,
 #ifdef __GNUC__
    // We need to get the address of the first element to avoid
    // bounds checking in TNL::Array::operator[]
+   typedef typename Vector1::RealType Real;   
          Real* Y = y.getData();
    const Real* X = x.getData();
@@ -600,12 +597,11 @@ addVectors( Vector1& v,
             const typename Vector3::RealType& multiplicator2,
             const typename Vector1::RealType& thisMultiplicator )
-   typedef typename Vector1::RealType Real;
    typedef typename Vector1::IndexType Index;
-   TNL_ASSERT( v.getSize() > 0, );
-   TNL_ASSERT( v.getSize() == v1.getSize(), );
-   TNL_ASSERT( v.getSize() == v2.getSize(), );
+   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
+   TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." );
+   TNL_ASSERT_EQ( v.getSize(), v2.getSize(), "The vector sizes must be the same." );
    const Index n = v.getSize();
    if( thisMultiplicator == 1.0 )
@@ -631,6 +627,7 @@ computePrefixSum( Vector& v,
    typedef typename Vector::IndexType Index;
+   // TODO: parallelize with OpenMP
    for( Index i = begin + 1; i < end; i++ )
       v[ i ] += v[ i - 1 ];
@@ -645,6 +642,7 @@ computeExclusivePrefixSum( Vector& v,
    typedef typename Vector::IndexType Index;
    typedef typename Vector::RealType Real;
+   // TODO: parallelize with OpenMP
    Real aux( v[ begin ] );
    v[ begin ] = 0.0;
    for( Index i = begin + 1; i < end; i++ )
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..ae462d7a8b7d5c7a425e243543f90b8c4444d03b
--- /dev/null
+++ b/src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h
@@ -0,0 +1,669 @@
+                          VectorOperationsMIC_impl.h  -  description
+                                by hanouvit
+                          -------------------
+    begin                : Nov 7, 2012
+    copyright            : (C) 2012 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Devices/MIC.h>
+#include <TNL/Math.h>
+namespace TNL {
+namespace Containers {
+namespace Algorithms {
+//static const int OpenMPVectorOperationsThreshold = 65536; // TODO: check this threshold
+template< typename Vector >
+VectorOperations< Devices::MIC >::
+addElement( Vector& v,
+            const typename Vector::IndexType i,
+            const typename Vector::RealType& value )
+   // v[ i ] += value;
+   //cout << "Errorous function, not clear wher should be called (device or Host)" << endl;
+   v.setElement(i,v.getElemet(i)+value);
+template< typename Vector >
+VectorOperations< Devices::MIC >::
+addElement( Vector& v,
+            const typename Vector::IndexType i,
+            const typename Vector::RealType& value,
+            const typename Vector::RealType& thisElementMultiplicator )
+   //v[ i ] = thisElementMultiplicator * v[ i ] + value;
+   //cout << "Errorous function, not clear wher should be called (device or Host)" << endl;
+   v.setElement(i,thisElementMultiplicator*v.getElemet(i)+value);
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorMax( const Vector& v )
+   //tady je možnost paralelizace
+   typename Vector::RealType result;
+   typename Vector::IndexType size=v.getSize();
+   Devices::MICHider<const typename Vector::RealType > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,size) out(result)
+   {
+      result=vct.pointer[0];
+      for(typename Vector::IndexType i=1;i<size;i++)
+      {
+         if(result<vct.pointer[i])
+            result=vct.pointer[i];
+      }
+   }
+   return result;
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorMin( const Vector& v )
+   //tady je možnost paralelizace
+   typename Vector::RealType result;
+   typename Vector::IndexType size=v.getSize();
+   Devices::MICHider<const typename Vector::RealType > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,size) out(result)
+   {
+      result=vct.pointer[0];
+      for(typename Vector::IndexType i=1;i<size;i++)
+      {
+         if(result>vct.pointer[i])
+            result=vct.pointer[i];
+      }
+   }
+   return result;
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorAbsMax( const Vector& v )
+   //tady je možnost paralelizace
+   typename Vector::RealType result;
+   typename Vector::IndexType size=v.getSize();
+   Devices::MICHider<const typename Vector::RealType > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,size) out(result)
+   {
+      result=TNL::abs(vct.pointer[0]);
+      for(typename Vector::IndexType i=1;i<size;i++)
+      {
+         if(result<TNL::abs(vct.pointer[i]))
+            result=TNL::abs(vct.pointer[i]);
+      }
+   }
+   return result;
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorAbsMin( const Vector& v )
+   //tady je možnost paralelizace
+   typename Vector::RealType result;
+   typename Vector::IndexType size=v.getSize();
+   Devices::MICHider<const typename Vector::RealType > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,size) out(result)
+   {
+      result=TNL::abs(vct.pointer[0]);
+      for(typename Vector::IndexType i=1;i<size;i++)
+      {
+         if(result>TNL::abs(vct.pointer[i]))
+            result=TNL::abs(vct.pointer[i]);
+      }
+   }
+   return result;
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorL1Norm( const Vector& v )
+   typedef typename Vector::RealType Real;
+   typedef typename Vector::IndexType Index;
+   TNL_ASSERT( v. getSize() > 0, );
+   Real result( 0.0 );
+   const Index n = v. getSize();
+   Devices::MICHider<const Real > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,n) inout(result)
+   {
+      #pragma omp parallel for reduction(+:result)// if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+      for( Index i = 0; i < n; i ++ )
+         result += TNL::abs( vct.pointer[ i ] );
+   }
+   return result;
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorL2Norm( const Vector& v )
+   typedef typename Vector::RealType Real;
+   typedef typename Vector::IndexType Index;
+   TNL_ASSERT( v. getSize() > 0, );
+   Real result( 0.0 );
+   const Index n = v. getSize();
+   Devices::MICHider<const Real > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,n) inout(result)
+   {
+      #pragma omp parallel for reduction(+:result) //if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+      for( Index i = 0; i < n; i ++ )
+      {
+         const Real& aux = vct.pointer[ i ];
+         result += aux * aux;
+      }
+   }
+   return TNL::sqrt( result );
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorLpNorm( const Vector& v,
+                 const typename Vector::RealType& p )
+   typedef typename Vector::RealType Real;
+   typedef typename Vector::IndexType Index;
+   TNL_ASSERT( v. getSize() > 0, );
+   TNL_ASSERT( p > 0.0,
+               std::cerr << " p = " << p );
+   if( p == 1.0 )
+      return getVectorL1Norm( v );
+   if( p == 2.0 )
+      return getVectorL2Norm( v );
+   Real result( 0.0 );
+   const Index n = v. getSize();
+   Devices::MICHider<const Real > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,n) inout(result)
+   {
+      #pragma omp parallel for reduction(+:result) //if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+      for( Index i = 0; i < n; i ++ )
+      {
+         result += TNL::pow( TNL::abs( vct.pointer[ i ] ), p );
+      }
+   }
+   return TNL::pow( result, 1.0 / p );
+template< typename Vector >
+typename Vector::RealType
+VectorOperations< Devices::MIC >::
+getVectorSum( const Vector& v )
+   typedef typename Vector::RealType Real;
+   typedef typename Vector::IndexType Index;
+   TNL_ASSERT( v. getSize() > 0, );
+   Real result( 0.0 );
+   const Index n = v. getSize();
+   Devices::MICHider<const Real > vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,n) inout(result)
+   {
+      #pragma omp parallel for reduction(+:result)// if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+      for( Index i = 0; i < n; i ++ )
+         result += vct.pointer[ i ] ;
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC>::
+getVectorDifferenceMax( const Vector1& v1,
+                        const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) out(result)
+   {
+      result = vct1.pointer[0] - vct2.pointer[0];
+      for( Index i = 1; i < n; i ++ )
+         result = TNL::max( result, vct1.pointer[ i ] - vct2.pointer[ i ] );
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceMin( const Vector1& v1,
+                        const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) out(result)
+   {
+      result = vct1.pointer[0] - vct2.pointer[0];
+      for( Index i = 1; i < n; i ++ )
+         result = TNL::min( result, vct1.pointer[ i ] - vct2.pointer[ i ] );
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceAbsMax( const Vector1& v1,
+                           const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) out(result)
+   {
+      result = TNL::abs(vct1.pointer[0] - vct2.pointer[0]);
+      for( Index i = 1; i < n; i ++ )
+         result = TNL::max( result, TNL::abs(vct1.pointer[ i ] - vct2.pointer[ i ]) );
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceAbsMin( const Vector1& v1,
+                           const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) out(result)
+   {
+      result = TNL::abs(vct1.pointer[0] - vct2.pointer[0]);
+      for( Index i = 1; i < n; i ++ )
+         result = TNL::min( result, TNL::abs(vct1.pointer[ i ] - vct2.pointer[ i ]) );
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceL1Norm( const Vector1& v1,
+                           const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real> vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) inout(result)
+   {
+      for( Index i = 0; i < n; i ++ )
+         result += TNL::abs( vct1.pointer[ i ] - vct2.pointer[ i ] );
+   }
+   return result;
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceL2Norm( const Vector1& v1,
+                           const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) inout(result)
+   {
+      for( Index i = 0; i < n; i ++ )
+      {
+         Real aux = TNL::abs( vct1.pointer[ i ] - vct2.pointer[ i ] );
+         result += aux * aux;
+      }
+   }
+   return TNL::sqrt( result );
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceLpNorm( const Vector1& v1,
+                           const Vector2& v2,
+                           const typename Vector1::RealType& p )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( p > 0.0,
+              std::cerr << " p = " << p );
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   if( p == 1.0 )
+      return getVectorDifferenceL1Norm( v1, v2 );
+   if( p == 2.0 )
+      return getVectorDifferenceL2Norm( v1, v2 );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) inout(result)
+   {
+      for( Index i = 0; i < n; i ++ )
+      {
+         result += TNL::pow( TNL::abs( vct1.pointer[ i ] - vct2.pointer[ i ] ), p );
+      }
+   }
+   return TNL::pow( result, 1.0 / p );
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getVectorDifferenceSum( const Vector1& v1,
+                        const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(n,vct1,vct2) inout(result)
+   {
+      for( Index i = 0; i < n; i ++ )
+         result +=  vct1.pointer[ i ] - vct2.pointer[ i ];
+   }
+   return result;
+template< typename Vector >
+VectorOperations< Devices::MIC >::
+vectorScalarMultiplication( Vector& v,
+                            const typename Vector::RealType& alpha )
+   typedef typename Vector::RealType Real;
+   typedef typename Vector::IndexType Index;
+   TNL_ASSERT( v. getSize() > 0, );
+   const Index n = v. getSize();
+   Devices::MICHider<Real > vct;
+   vct.pointer=v.getData();
+   Real a=alpha;
+   #pragma offload target(mic) in(vct,a,n)
+   {
+      for( Index i = 0; i < n; i ++ )
+         vct.pointer[ i ] *= a;
+   }
+template< typename Vector1, typename Vector2 >
+typename Vector1::RealType
+VectorOperations< Devices::MIC >::
+getScalarProduct( const Vector1& v1,
+                  const Vector2& v2 )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v1. getSize() > 0, );
+   TNL_ASSERT( v1. getSize() == v2. getSize(), );
+   Real result( 0.0 );
+   const Index n = v1. getSize();
+   Devices::MICHider<const Real > vct1;
+   Devices::MICHider<const Real > vct2;
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   #pragma offload target(mic) in(vct1,vct2,n) inout(result)
+   {
+      #pragma omp parallel for reduction(+:result)// if( n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold
+      for( Index i = 0; i < n; i++ )
+         result += vct1.pointer[ i ] * vct2.pointer[ i ];
+   }
+   /*Real result1( 0.0 ), result2( 0.0 ), result3( 0.0 ), result4( 0.0 ),
+        result5( 0.0 ), result6( 0.0 ), result7( 0.0 ), result8( 0.0 );
+   Index i( 0 );
+   while( i + 8 < n )
+   {
+      result1 += v1[ i ] * v2[ i ];
+      result2 += v1[ i + 1 ] * v2[ i + 1 ];
+      result3 += v1[ i + 2 ] * v2[ i + 2 ];
+      result4 += v1[ i + 3 ] * v2[ i + 3 ];
+      result5 += v1[ i + 4 ] * v2[ i + 4 ];
+      result6 += v1[ i + 5 ] * v2[ i + 5 ];
+      result7 += v1[ i + 6 ] * v2[ i + 6 ];
+      result8 += v1[ i + 7 ] * v2[ i + 7 ];
+      i += 8;
+   }
+   Real result = result1 + result2 + result3 + result4 + result5 +result6 +result7 +result8;
+   while( i < n )
+      result += v1[ i ] * v2[ i++ ];*/
+   return result;
+template< typename Vector1, typename Vector2 >
+VectorOperations< Devices::MIC >::
+addVector( Vector1& y,
+           const Vector2& x,
+           const typename Vector2::RealType& alpha,
+           const typename Vector1::RealType& thisMultiplicator )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( x. getSize() > 0, );
+   TNL_ASSERT( x. getSize() == y. getSize(), );
+   const Index n = y. getSize();
+   Devices::MICHider<Real> vct;
+   Devices::MICHider<const Real> vct2;
+   vct.pointer=y.getData();
+   vct2.pointer=x.getData();
+   Real a=alpha;
+   Real t=thisMultiplicator;
+   #pragma offload target(mic) in(vct,vct2,n,a,t)
+   {
+      for( Index i = 0; i < n; i ++ )
+         vct.pointer[ i ] = t * vct.pointer[ i ] + a * vct2.pointer[ i ];
+   }
+template< typename Vector1,
+          typename Vector2,
+          typename Vector3 >
+VectorOperations< Devices::MIC >::
+addVectors( Vector1& v,
+            const Vector2& v1,
+            const typename Vector2::RealType& multiplicator1,
+            const Vector3& v2,
+            const typename Vector3::RealType& multiplicator2,
+            const typename Vector1::RealType& thisMultiplicator )
+   typedef typename Vector1::RealType Real;
+   typedef typename Vector1::IndexType Index;
+   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT( v.getSize() == v1.getSize(), );
+   TNL_ASSERT( v.getSize() == v2.getSize(), );
+   const Index n = v. getSize();
+   Devices::MICHider<Real> vct;
+   Devices::MICHider<const Real> vct1;
+   Devices::MICHider<const Real> vct2;
+   vct.pointer=v.getData();
+   vct1.pointer=v1.getData();
+   vct2.pointer=v2.getData();
+   Real t=thisMultiplicator;
+   Real m1=multiplicator1;
+   Real m2=multiplicator2;
+   #pragma offload target(mic) in(vct,vct1,vct2,n,t,m1,m2)
+   {
+      for( Index i = 0; i < n; i ++ )
+         vct.pointer[ i ] = t * vct.pointer[ i ] + m1 * vct1.pointer[ i ] + m2 * vct2.pointer[ i ];
+   }
+template< typename Vector >
+VectorOperations< Devices::MIC >::
+computePrefixSum( Vector& v,
+                  typename Vector::IndexType begin,
+                  typename Vector::IndexType end )
+   typedef typename Vector::IndexType Index;
+   //std::cout << v.getSize()<< "    " << end <<endl;
+   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT( v.getSize() >= end, );
+   TNL_ASSERT( v.getSize() > begin, );
+   TNL_ASSERT( end > begin, );
+   Devices::MICHider<typename Vector::RealType> vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,begin,end)
+   {
+      for( Index i = begin + 1; i < end; i++ )
+         vct.pointer[ i ] += vct.pointer[ i - 1 ];
+   }
+template< typename Vector >
+VectorOperations< Devices::MIC >::
+computeExclusivePrefixSum( Vector& v,
+                           typename Vector::IndexType begin,
+                           typename Vector::IndexType end )
+   typedef typename Vector::IndexType Index;
+   typedef typename Vector::RealType Real;
+   TNL_ASSERT( v.getSize() > 0, );
+   TNL_ASSERT( v.getSize() >= end, );
+   TNL_ASSERT( v.getSize() > begin, );
+   TNL_ASSERT( begin >= 0, );
+   TNL_ASSERT( end > begin, );
+   Devices::MICHider<Real> vct;
+   vct.pointer=v.getData();
+   #pragma offload target(mic) in(vct,begin,end)
+   {
+      Real aux( vct.pointer[ begin ] );
+      vct.pointer[ begin ] = 0.0;
+      for( Index i = begin + 1; i < end; i++ )
+      {
+         Real x = vct.pointer[ i ];
+         vct.pointer[ i ] = aux;
+         aux += x;
+      }
+   }
+} // namespace Algorithms
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h
index 2c7ec97c5dee2adea84d8c67bafce27fda4fc3aa..37215a99570676c04a9681ec0540f752e15a26f4 100644
--- a/src/TNL/Containers/Algorithms/cuda-prefix-sum.h
+++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum.h
@@ -14,8 +14,11 @@ namespace TNL {
 namespace Containers {
 namespace Algorithms {
-enum enumPrefixSumType { exclusivePrefixSum = 0,
-                         inclusivePrefixSum };
+enum class PrefixSumType
+   exclusive,
+   inclusive
 template< typename DataType,
           typename Operation,
@@ -25,7 +28,7 @@ bool cudaPrefixSum( const Index size,
                     const DataType *deviceInput,
                     DataType* deviceOutput,
                     const Operation& operation,
-                    const enumPrefixSumType prefixSumType = inclusivePrefixSum );
+                    const PrefixSumType prefixSumType = PrefixSumType::inclusive );
 } // namespace Algorithms
 } // namespace Containers
diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
index 971544a024454ef488db269cbddd3b4c755d055f..85f8eace6944f27ce84e602b5333b153a0010c0d 100644
--- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
+++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h
@@ -11,7 +11,10 @@
 #pragma once
 #include <iostream>
+#include <TNL/Math.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
 #include <TNL/Containers/Algorithms/reduction-operations.h>
 #ifdef HAVE_CUDA
@@ -23,56 +26,56 @@ namespace Algorithms {
 template< typename DataType,
           typename Operation,
           typename Index >
-__global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumType,
-                                              Operation operation,
-                                              const Index size,
-                                              const Index elementsInBlock,
-                                              const DataType* input,
-                                              DataType* output,
-                                              DataType* auxArray )
+__global__ void
+cudaFirstPhaseBlockPrefixSum( const PrefixSumType prefixSumType,
+                              Operation operation,
+                              const Index size,
+                              const Index elementsInBlock,
+                              const DataType* input,
+                              DataType* output,
+                              DataType* auxArray )
    DataType* sharedData = TNL::Devices::Cuda::getSharedMemory< DataType >();
    DataType* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ];
-   DataType* warpSums = &auxData[ blockDim. x ];
+   DataType* warpSums = &auxData[ blockDim.x ];
-   const Index lastElementIdx = size - blockIdx. x * elementsInBlock;
+   const Index lastElementIdx = size - blockIdx.x * elementsInBlock;
    Index lastElementInBlock = ( lastElementIdx < elementsInBlock ?
                                 lastElementIdx : elementsInBlock );
     * Load data into the shared memory.
-   const Index blockOffset = blockIdx. x * elementsInBlock;
-   Index idx = threadIdx. x;
-   if( prefixSumType == exclusivePrefixSum )
+   const Index blockOffset = blockIdx.x * elementsInBlock;
+   Index idx = threadIdx.x;
+   if( prefixSumType == PrefixSumType::exclusive )
       if( idx == 0 )
          sharedData[ 0 ] = operation.initialValue();
       while( idx < elementsInBlock && blockOffset + idx < size )
          sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ];
-         idx += blockDim. x;
+         idx += blockDim.x;
       while( idx < elementsInBlock && blockOffset + idx < size )
          sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ];
-         idx += blockDim. x;
+         idx += blockDim.x;
     * Perform the sequential prefix-sum.
-   const int chunkSize = elementsInBlock / blockDim. x;
-   const int chunkOffset = threadIdx. x * chunkSize;
-   const int numberOfChunks = lastElementInBlock / chunkSize +
-                            ( lastElementInBlock % chunkSize != 0 );
+   const int chunkSize = elementsInBlock / blockDim.x;
+   const int chunkOffset = threadIdx.x * chunkSize;
+   const int numberOfChunks = roundUpDivision( lastElementInBlock, chunkSize );
    if( chunkOffset < lastElementInBlock )
-      auxData[ threadIdx. x ] =
+      auxData[ threadIdx.x ] =
          sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ];
@@ -82,22 +85,22 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT
       operation.commonReductionOnDevice( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ],
                                          sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] );
-      auxData[ threadIdx. x ] =
+      auxData[ threadIdx.x ] =
          sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer  ) ];
-      chunkPointer ++;
+      chunkPointer++;
     *  Perform the parallel prefix-sum inside warps.
-   const int threadInWarpIdx = threadIdx. x % Devices::Cuda::getWarpSize();
-   const int warpIdx = threadIdx. x / Devices::Cuda::getWarpSize();
+   const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize();
+   const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize();
    for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 )
-      if( threadInWarpIdx >= stride && threadIdx. x < numberOfChunks )
-         operation.commonReductionOnDevice( auxData[ threadIdx. x ], auxData[ threadIdx. x - stride ] );
+      if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks )
+         operation.commonReductionOnDevice( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] );
    if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 )
-      warpSums[ warpIdx ] = auxData[ threadIdx. x ];
+      warpSums[ warpIdx ] = auxData[ threadIdx.x ];
@@ -113,13 +116,13 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT
     * Shift the warp prefix-sums.
    if( warpIdx > 0 )
-      operation.commonReductionOnDevice( auxData[ threadIdx. x ], warpSums[ warpIdx - 1 ] );
+      operation.commonReductionOnDevice( auxData[ threadIdx.x ], warpSums[ warpIdx - 1 ] );
     *  Store the result back in global memory.
-   idx = threadIdx. x;
+   idx = threadIdx.x;
    while( idx < elementsInBlock && blockOffset + idx < size )
       const Index chunkIdx = idx / chunkSize;
@@ -128,49 +131,49 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT
          chunkShift = auxData[ chunkIdx - 1 ];
       operation.commonReductionOnDevice( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift );
       output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ];
-      idx += blockDim. x;
+      idx += blockDim.x;
-   if( threadIdx. x == 0 )
+   if( threadIdx.x == 0 )
-      if( prefixSumType == exclusivePrefixSum )
+      if( prefixSumType == PrefixSumType::exclusive )
-         /*auxArray[ blockIdx. x ] = operation.commonReductionOnDevice( Devices::Cuda::getInterleaving( lastElementInBlock - 1 ),
+         /*auxArray[ blockIdx.x ] = operation.commonReductionOnDevice( Devices::Cuda::getInterleaving( lastElementInBlock - 1 ),
                                                                       Devices::Cuda::getInterleaving( lastElementInBlock ),
                                                                       sharedData );*/
          DataType aux = operation.initialValue();
          operation.commonReductionOnDevice( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ] );
          operation.commonReductionOnDevice( aux, sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] );
-         auxArray[ blockIdx. x ] = aux;
+         auxArray[ blockIdx.x ] = aux;
-         auxArray[ blockIdx. x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ];
+         auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ];
 template< typename DataType,
           typename Operation,
           typename Index >
-__global__ void cudaSecondPhaseBlockPrefixSum( Operation operation,
-                                               const Index size,
-                                               const Index elementsInBlock,
-                                               const Index gridShift,
-                                               const DataType* auxArray,
-                                               DataType* data )
+__global__ void
+cudaSecondPhaseBlockPrefixSum( Operation operation,
+                               const Index size,
+                               const Index elementsInBlock,
+                               const Index gridShift,
+                               const DataType* auxArray,
+                               DataType* data )
-   if( blockIdx. x > 0 )
+   if( blockIdx.x > 0 )
       DataType shift( gridShift );
-      operation.commonReductionOnDevice( shift, auxArray[ blockIdx. x - 1 ] );
+      operation.commonReductionOnDevice( shift, auxArray[ blockIdx.x - 1 ] );
-      const Index readOffset = blockIdx. x * elementsInBlock;
-      Index readIdx = threadIdx. x;
+      const Index readOffset = blockIdx.x * elementsInBlock;
+      Index readIdx = threadIdx.x;
       while( readIdx < elementsInBlock && readOffset + readIdx < size )
          operation.commonReductionOnDevice( data[ readIdx + readOffset ], shift );
-         readIdx += blockDim. x;
+         readIdx += blockDim.x;
@@ -179,89 +182,69 @@ __global__ void cudaSecondPhaseBlockPrefixSum( Operation operation,
 template< typename DataType,
           typename Operation,
           typename Index >
-bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType,
-                             Operation& operation,
-                             const Index size,
-                             const Index blockSize,
-                             const Index elementsInBlock,
-                             const Index gridShift,
-                             const DataType* input,
-                             DataType *output )
+cudaRecursivePrefixSum( const PrefixSumType prefixSumType,
+                        Operation& operation,
+                        const Index size,
+                        const Index blockSize,
+                        const Index elementsInBlock,
+                        const Index gridShift,
+                        const DataType* input,
+                        DataType *output )
-   const Index numberOfBlocks = ceil( ( double ) size / ( double ) elementsInBlock );
+   const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
    const Index auxArraySize = numberOfBlocks * sizeof( DataType );
-   DataType *auxArray1, *auxArray2;
-   if( cudaMalloc( ( void** ) &auxArray1, auxArraySize ) != cudaSuccess ||
-       cudaMalloc( ( void** ) &auxArray2, auxArraySize ) != cudaSuccess  )
-   {
-      {
-         std::cerr << "Not enough memory on device to allocate auxilliary arrays." << std::endl;
-         return false;
-      }
-   }
+   Array< DataType, Devices::Cuda > auxArray1, auxArray2;
+   auxArray1.setSize( auxArraySize );
+   auxArray2.setSize( auxArraySize );
     * Setup block and grid size.
    dim3 cudaBlockSize( 0 ), cudaGridSize( 0 );
-   cudaBlockSize. x = blockSize;
-   cudaGridSize. x = size / elementsInBlock +
-                     ( size % elementsInBlock != 0 );
+   cudaBlockSize.x = blockSize;
+   cudaGridSize.x = roundUpDivision( size, elementsInBlock );
     * Run the kernel.
-   size_t sharedDataSize = elementsInBlock +
-                           elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2;
-   size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize()  ) * sizeof( DataType );
-   cudaFirstPhaseBlockPrefixSum< DataType, Operation, Index >
-                                <<< cudaGridSize, cudaBlockSize, sharedMemory >>>
-                                (  prefixSumType,
-                                   operation,
-                                   size,
-                                   elementsInBlock,
-                                   input,
-                                   output,
-                                   auxArray1 );
-   if( ! checkCudaDevice )
-   {
-      std::cerr << "The CUDA kernel 'cudaFirstPhaseBlockPrefixSum' ended with error." << std::endl;
-      cudaFree( auxArray1 );
-      cudaFree( auxArray2 );
-      return false;
-   }
+   const std::size_t sharedDataSize = elementsInBlock +
+                                      elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2;
+   const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize()  ) * sizeof( DataType );
+   cudaFirstPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize, sharedMemory >>>
+      ( prefixSumType,
+        operation,
+        size,
+        elementsInBlock,
+        input,
+        output,
+        auxArray1.getData() );
     * In auxArray1 there is now a sum of numbers in each block.
     * We must compute prefix-sum of auxArray1 and then shift
     * each block.
-   if( numberOfBlocks > 1 &&
-       ! cudaRecursivePrefixSum< DataType, Operation, Index >
-                               ( inclusivePrefixSum,
-                                 operation,
-                                 numberOfBlocks,
-                                 blockSize,
-                                 elementsInBlock,
-                                 0,
-                                 auxArray1,
-                                 auxArray2 ) )
-      return false;
-   cudaSecondPhaseBlockPrefixSum< DataType, Operation, Index >
-                                <<< cudaGridSize, cudaBlockSize >>>
-                                 ( operation, size, elementsInBlock, gridShift, auxArray2, output );
-   if( ! checkCudaDevice )
-   {
-      std::cerr << "The CUDA kernel 'cudaSecondPhaseBlockPrefixSum' ended with error." << std::endl;
-      cudaFree( auxArray1 );
-      cudaFree( auxArray2 );
-      return false;
-   }
-   cudaFree( auxArray1 );
-   cudaFree( auxArray2 );
-   return true;
+   if( numberOfBlocks > 1 )
+       cudaRecursivePrefixSum( PrefixSumType::inclusive,
+                               operation,
+                               numberOfBlocks,
+                               blockSize,
+                               elementsInBlock,
+                               (Index) 0,
+                               auxArray1.getData(),
+                               auxArray2.getData() );
+   cudaSecondPhaseBlockPrefixSum<<< cudaGridSize, cudaBlockSize >>>
+      ( operation,
+        size,
+        elementsInBlock,
+        gridShift,
+        auxArray2.getData(),
+        output );
@@ -269,85 +252,74 @@ bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType,
 template< typename DataType,
           typename Operation,
           typename Index >
-bool cudaGridPrefixSum( enumPrefixSumType prefixSumType,
-                        Operation& operation,
-                        const Index size,
-                        const Index blockSize,
-                        const Index elementsInBlock,
-                        const DataType *deviceInput,
-                        DataType *deviceOutput,
-                        Index& gridShift )
+cudaGridPrefixSum( PrefixSumType prefixSumType,
+                   Operation& operation,
+                   const Index size,
+                   const Index blockSize,
+                   const Index elementsInBlock,
+                   const DataType *deviceInput,
+                   DataType *deviceOutput,
+                   Index& gridShift )
-   if( ! cudaRecursivePrefixSum< DataType, Operation, Index >
-                               ( prefixSumType,
-                                 operation,
-                                 size,
-                                 blockSize,
-                                 elementsInBlock,
-                                 gridShift,
-                                 deviceInput,
-                                 deviceOutput ) )
-      return false;
-   if( cudaMemcpy( &gridShift,
-                   &deviceOutput[ size - 1 ],
-                   sizeof( DataType ),
-                   cudaMemcpyDeviceToHost ) != cudaSuccess )
-   {
-      std::cerr << "I am not able to copy data from device to host." << std::endl;
-      return false;
-   }
-   return true;
+   cudaRecursivePrefixSum( prefixSumType,
+                           operation,
+                           size,
+                           blockSize,
+                           elementsInBlock,
+                           gridShift,
+                           deviceInput,
+                           deviceOutput );
+   cudaMemcpy( &gridShift,
+               &deviceOutput[ size - 1 ],
+               sizeof( DataType ),
+               cudaMemcpyDeviceToHost );
 template< typename DataType,
           typename Operation,
           typename Index >
-bool cudaPrefixSum( const Index size,
-                    const Index blockSize,
-                    const DataType *deviceInput,
-                    DataType* deviceOutput,
-                    Operation& operation,
-                    const enumPrefixSumType prefixSumType )
+cudaPrefixSum( const Index size,
+               const Index blockSize,
+               const DataType *deviceInput,
+               DataType* deviceOutput,
+               Operation& operation,
+               const PrefixSumType prefixSumType )
     * Compute the number of grids
    const Index elementsInBlock = 8 * blockSize;
-   const Index gridSize = size / elementsInBlock + ( size % elementsInBlock != 0 );
-   const Index maxGridSize = 65536;
-   const Index gridsNumber = gridSize / maxGridSize + ( gridSize % maxGridSize != 0 );
+   const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
+   const auto maxGridSize = Devices::Cuda::getMaxGridSize();
+   const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize );
     * Loop over all grids.
-   Index gridShift( 0 );
-   for( Index gridIdx = 0; gridIdx < gridsNumber; gridIdx ++ )
+   Index gridShift = 0;
+   for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ )
        * Compute current grid size and size of data to be scanned
-      Index gridSize = ( size - gridIdx * maxGridSize * elementsInBlock ) /
-                     elementsInBlock;
       Index currentSize = size - gridIdx * maxGridSize * elementsInBlock;
-      if( gridSize > maxGridSize )
-      {
-         gridSize = maxGridSize;
+      if( currentSize / elementsInBlock > maxGridSize )
          currentSize = maxGridSize * elementsInBlock;
-      }
-      Index gridOffset = gridIdx * maxGridSize * elementsInBlock;
-      if( ! cudaGridPrefixSum< DataType, Operation, Index >
-                             ( prefixSumType,
-                               operation,
-                               currentSize,
-                               blockSize,
-                               elementsInBlock,
-                               &deviceInput[ gridOffset ],
-                               &deviceOutput[ gridOffset ],
-                               gridShift ) )
-         return false;
+      const Index gridOffset = gridIdx * maxGridSize * elementsInBlock;
+      cudaGridPrefixSum( prefixSumType,
+                         operation,
+                         currentSize,
+                         blockSize,
+                         elementsInBlock,
+                         &deviceInput[ gridOffset ],
+                         &deviceOutput[ gridOffset ],
+                         gridShift );
-   return true;
@@ -356,7 +328,7 @@ extern template bool cudaPrefixSum( const int size,
                                     const int *deviceInput,
                                     int* deviceOutput,
                                     tnlParallelReductionSum< int, int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const int size,
@@ -364,14 +336,14 @@ extern template bool cudaPrefixSum( const int size,
                                     const float *deviceInput,
                                     float* deviceOutput,
                                     tnlParallelReductionSum< float, int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const int size,
                                     const int blockSize,
                                     const double *deviceInput,
                                     double* deviceOutput,
                                     tnlParallelReductionSum< double, int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const int size,
@@ -379,7 +351,7 @@ extern template bool cudaPrefixSum( const int size,
                                     const long double *deviceInput,
                                     long double* deviceOutput,
                                     tnlParallelReductionSum< long double, int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
@@ -388,7 +360,7 @@ extern template bool cudaPrefixSum( const long int size,
                                     const int *deviceInput,
                                     int* deviceOutput,
                                     tnlParallelReductionSum< int, long int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const long int size,
@@ -396,14 +368,14 @@ extern template bool cudaPrefixSum( const long int size,
                                     const float *deviceInput,
                                     float* deviceOutput,
                                     tnlParallelReductionSum< float, long int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const long int size,
                                     const long int blockSize,
                                     const double *deviceInput,
                                     double* deviceOutput,
                                     tnlParallelReductionSum< double, long int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
 extern template bool cudaPrefixSum( const long int size,
@@ -411,7 +383,7 @@ extern template bool cudaPrefixSum( const long int size,
                                     const long double *deviceInput,
                                     long double* deviceOutput,
                                     tnlParallelReductionSum< long double, long int >& operation,
-                                    const enumPrefixSumType prefixSumType );
+                                    const PrefixSumType prefixSumType );
diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h
index 92a3ccd6fd519b50435751e3753218b7060778a4..89e72cb8b4f87731f2b31065901bde59c4954510 100644
--- a/src/TNL/Containers/Array.h
+++ b/src/TNL/Containers/Array.h
@@ -8,7 +8,7 @@
 /* See Copyright Notice in tnl/Copyright */
-#pragma once 
+#pragma once
 #include <TNL/Object.h>
 #include <TNL/File.h>
@@ -35,18 +35,17 @@ class Array : public virtual Object
       typedef Index IndexType;
       typedef Containers::Array< Element, Devices::Host, Index > HostType;
       typedef Containers::Array< Element, Devices::Cuda, Index > CudaType;
-      typedef Containers::Array< Element, Device, Index > ThisType;
       Array( const IndexType& size );
       Array( Element* data,
-                const IndexType& size );
+             const IndexType& size );
-      Array( Array< Element, Device, Index >& array,
-                const IndexType& begin = 0,
-                const IndexType& size = 0 );
+      Array( Array& array,
+             const IndexType& begin = 0,
+             const IndexType& size = 0 );
       static String getType();
@@ -61,28 +60,28 @@ class Array : public virtual Object
        * these data are released. If the current data are not shared and the current
        * size is the same as the new one, nothing happens.
-      bool setSize( Index size );
+      void setSize( Index size );
-      template< typename Array >
-      bool setLike( const Array& array );
+      __cuda_callable__ Index getSize() const;
+      template< typename ArrayT >
+      void setLike( const ArrayT& array );
       void bind( Element* _data,
                  const Index _size );
-      template< typename Array >      
-      void bind( const Array& array,
+      template< typename ArrayT >
+      void bind( const ArrayT& array,
                  const IndexType& begin = 0,
                  const IndexType& size = 0 );
       template< int Size >
       void bind( StaticArray< Size, Element >& array );
-      void swap( Array< Element, Device, Index >& array );
+      void swap( Array& array );
       void reset();
-      __cuda_callable__ Index getSize() const;
       void setElement( const Index& i, const Element& x );
       Element getElement( const Index& i ) const;
@@ -91,10 +90,10 @@ class Array : public virtual Object
       __cuda_callable__ inline const Element& operator[] ( const Index& i ) const;
-      Array< Element, Device, Index >& operator = ( const Array< Element, Device, Index >& array );
+      Array& operator = ( const Array& array );
       template< typename ArrayT >
-      Array< Element, Device, Index >& operator = ( const ArrayT& array );
+      Array& operator = ( const ArrayT& array );
       template< typename ArrayT >
       bool operator == ( const ArrayT& array ) const;
@@ -118,20 +117,15 @@ class Array : public virtual Object
        * Every time one touches this grid touches * size * sizeof( Real ) bytes are added
        * to transfered bytes in tnlStatistics.
-   #ifdef HAVE_NOT_CXX11
-      template< typename IndexType2 >
-      void touch( IndexType2 touches = 1 ) const;
-   #else
       template< typename IndexType2 = Index >
       void touch( IndexType2 touches = 1 ) const;
-   #endif
       //! Method for saving the object to a file as a binary data.
       bool save( File& file ) const;
       //! Method for loading the object from a file as a binary data.
       bool load( File& file );
       //! This method loads data without reallocation.
        * This is useful for loading data into shared arrays.
@@ -140,17 +134,17 @@ class Array : public virtual Object
        * the size of array being loaded.
       bool boundLoad( File& file );
-      bool boundLoad( const String& fileName );
-      using Object::load;
       using Object::save;
+      using Object::load;
+      using Object::boundLoad;
       void releaseData() const;
       //!Number of elements in array
@@ -183,4 +177,3 @@ std::ostream& operator << ( std::ostream& str, const Array< Element, Device, Ind
 } // namespace TNL
 #include <TNL/Containers/Array_impl.h>
diff --git a/src/TNL/Containers/Array_impl.cpp b/src/TNL/Containers/Array_impl.cpp
index 966b0e5f35292a6a0a000b5b030108d67769b4be..c951e0526badefc0806979821e2b0decfbdc2cdf 100644
--- a/src/TNL/Containers/Array_impl.cpp
+++ b/src/TNL/Containers/Array_impl.cpp
@@ -8,13 +8,13 @@
 /* See Copyright Notice in tnl/Copyright */
 #include <TNL/Containers/Array.h>
 namespace TNL {
 namespace Containers {
 template class Array< float, Devices::Host, int >;
@@ -55,7 +55,7 @@ template class Array< long double, Devices::Cuda, long int >;
 } // namespace Containers
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
diff --git a/src/TNL/Containers/Array_impl.cu b/src/TNL/Containers/Array_impl.cu
index b14e5c30d96f7987b87595afff37d0da2ae00ac7..5c43ec36e69ccb02b2b9950d85e27f9271192b3d 100644
--- a/src/TNL/Containers/Array_impl.cu
+++ b/src/TNL/Containers/Array_impl.cu
@@ -8,13 +8,13 @@
 /* See Copyright Notice in tnl/Copyright */
 #include <TNL/Containers/Array.h>
 namespace TNL {
 namespace Containers {
 #ifdef HAVE_CUDA
 template class Array< float, Devices::Cuda, int >;
@@ -37,7 +37,7 @@ template class Array< long double, Devices::Cuda, long int >;
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/Array_impl.h b/src/TNL/Containers/Array_impl.h
index e9d34f860fa6c0dcff23e9847c311a3fe4d85e45..52504e47a31f057adf878c86e32891a1210daa7c 100644
--- a/src/TNL/Containers/Array_impl.h
+++ b/src/TNL/Containers/Array_impl.h
@@ -20,9 +20,7 @@
 #include <TNL/Containers/Array.h>
 namespace TNL {
-namespace Containers {   
-using namespace std;
+namespace Containers {
 template< typename Element,
           typename Device,
@@ -34,7 +32,7 @@ Array()
   allocationPointer( 0 ),
   referenceCounter( 0 )
 template< typename Element,
           typename Device,
@@ -74,10 +72,10 @@ Array( Array< Element, Device, Index >& array,
   allocationPointer( array.allocationPointer ),
   referenceCounter( 0 )
-   TNL_ASSERT( begin < array.getSize(),
-              std::cerr << " begin = " << begin << " array.getSize() = " << array.getSize() );
-   TNL_ASSERT( begin + size  < array.getSize(),
-              std::cerr << " begin = " << begin << " size = " << size <<  " array.getSize() = " << array.getSize() );
+   TNL_ASSERT_TRUE( array.getData(), "Empty arrays cannot be bound." );
+   TNL_ASSERT_LT( begin, array.getSize(), "Begin of array is out of bounds." );
+   TNL_ASSERT_LE( begin + size, array.getSize(), "End of array is out of bounds." );
    if( ! this->size )
       this->size = array.getSize() - begin;
    if( array.allocationPointer )
@@ -105,7 +103,7 @@ getType()
           TNL::getType< Element >() + ", " +
           Device::getDeviceType() + ", " +
           TNL::getType< Index >() + " >";
 template< typename Element,
           typename Device,
@@ -115,7 +113,7 @@ Array< Element, Device, Index >::
 getTypeVirtual() const
    return this->getType();
 template< typename Element,
           typename Device,
@@ -125,7 +123,7 @@ Array< Element, Device, Index >::
    return HostType::getType();
 template< typename Element,
           typename Device,
@@ -135,7 +133,7 @@ Array< Element, Device, Index >::
 getSerializationTypeVirtual() const
    return this->getSerializationType();
 template< typename Element,
           typename Device,
@@ -165,41 +163,48 @@ releaseData() const
 template< typename Element,
           typename Device,
           typename Index >
 Array< Element, Device, Index >::
 setSize( const Index size )
-   TNL_ASSERT( size >= 0,
-              std::cerr << "You try to set size of Array to negative value."
-                        << "New size: " << size << std::endl );
-   if( this->size == size && allocationPointer && ! referenceCounter ) return true;
+   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
+   if( this->size == size && allocationPointer && ! referenceCounter )
+      return;
-   Algorithms::ArrayOperations< Device >::allocateMemory( this->allocationPointer, size );
-   this->data = this->allocationPointer;
-   this->size = size;
-   if( size > 0 && ! this->allocationPointer )
-   {
-      cerr << "I am not able to allocate new array with size "
-           << ( double ) this->size * sizeof( ElementType ) / 1.0e9 << " GB." << endl;
-      this -> size = 0;
-      return false;
+   // Allocating zero bytes is useless. Moreover, the allocators don't behave the same way:
+   // "operator new" returns some non-zero address, the latter returns a null pointer.
+   if( size > 0 ) {
+      Algorithms::ArrayOperations< Device >::allocateMemory( this->allocationPointer, size );
+      this->data = this->allocationPointer;
+      this->size = size;
+      TNL_ASSERT_TRUE( this->allocationPointer,
+                       "This should never happen - allocator did not throw on an error." );
-   return true;
+template< typename Element,
+          typename Device,
+          typename Index >
+Array< Element, Device, Index >::
+getSize() const
+   return this -> size;
 template< typename Element,
           typename Device,
           typename Index >
    template< typename ArrayT >
 Array< Element, Device, Index >::
 setLike( const ArrayT& array )
-   TNL_ASSERT( array. getSize() >= 0,
-              std::cerr << "You try to set size of Array to negative value."
-                        << "Array size: " << array. getSize() << std::endl );
-   return setSize( array.getSize() );
+   setSize( array.getSize() );
 template< typename Element,
           typename Device,
@@ -209,6 +214,7 @@ Array< Element, Device, Index >::
 bind( Element* data,
       const Index size )
+   TNL_ASSERT_TRUE( data, "Null pointer cannot be bound." );
    this->data = data;
    this->size = size;
@@ -228,11 +234,10 @@ bind( const ArrayT& array,
    static_assert( std::is_same< Element, typename ArrayT::ElementType >::value, "ElementType of both arrays must be the same." );
    static_assert( std::is_same< Device, typename ArrayT::DeviceType >::value, "DeviceType of both arrays must be the same." );
    static_assert( std::is_same< Index, typename ArrayT::IndexType >::value, "IndexType of both arrays must be the same." );
-   TNL_ASSERT( begin <= array.getSize(),
-              std::cerr << " begin = " << begin << " array.getSize() = " << array.getSize() );
-   TNL_ASSERT( begin + size  <= array.getSize(),
-              std::cerr << " begin = " << begin << " size = " << size <<  " array.getSize() = " << array.getSize() );
+   TNL_ASSERT_TRUE( array.getData(), "Empty array cannot be bound." );
+   TNL_ASSERT_LT( begin, array.getSize(), "Begin of array is out of bounds." );
+   TNL_ASSERT_LE( begin + size, array.getSize(), "End of array is out of bounds." );
    if( size )
       this->size = size;
@@ -249,8 +254,7 @@ bind( const ArrayT& array,
-         this->referenceCounter = array.referenceCounter = new int;
-         *this->referenceCounter = 2;
+         this->referenceCounter = array.referenceCounter = new int( 2 );
          //std::cerr << "Allocating reference counter " << this->referenceCounter << std::endl;
@@ -281,7 +285,7 @@ swap( Array< Element, Device, Index >& array )
    TNL::swap( this->data, array.data );
    TNL::swap( this->allocationPointer, array.allocationPointer );
    TNL::swap( this->referenceCounter, array.referenceCounter );
 template< typename Element,
           typename Device,
@@ -291,17 +295,6 @@ Array< Element, Device, Index >::
-template< typename Element,
-          typename Device,
-          typename Index >
-Array< Element, Device, Index >::
-getSize() const
-   return this -> size;
 template< typename Element,
@@ -311,12 +304,10 @@ void
 Array< Element, Device, Index >::
 setElement( const Index& i, const Element& x )
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for setElement method in Array "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
-   return Algorithms::ArrayOperations< Device > :: setMemoryElement( &( this->data[ i ] ), x );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
+   return Algorithms::ArrayOperations< Device >::setMemoryElement( &( this->data[ i ] ), x );
 template< typename Element,
           typename Device,
@@ -325,12 +316,10 @@ Element
 Array< Element, Device, Index >::
 getElement( const Index& i ) const
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for getElement method in Array "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return Algorithms::ArrayOperations< Device >::getMemoryElement( & ( this->data[ i ] ) );
 template< typename Element,
           typename Device,
@@ -340,12 +329,10 @@ inline Element&
 Array< Element, Device, Index >::
 operator[] ( const Index& i )
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for operator[] in Array "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
 template< typename Element,
           typename Device,
@@ -355,12 +342,10 @@ inline const Element&
 Array< Element, Device, Index >::
 operator[] ( const Index& i ) const
-   TNL_ASSERT( 0 <= i && i < this->getSize(),
-              std::cerr << "Wrong index for operator[] in Array "
-                        << " index is " << i
-                        << " and array size is " << this->getSize() );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." );
    return this->data[ i ];
 template< typename Element,
           typename Device,
@@ -369,19 +354,19 @@ Array< Element, Device, Index >&
 Array< Element, Device, Index >::
 operator = ( const Array< Element, Device, Index >& array )
-   TNL_ASSERT( array. getSize() == this->getSize(),
-              std::cerr << "Source size: " << array. getSize() << std::endl
-                        << "Target size: " << this->getSize() << std::endl );
+   //TNL_ASSERT_EQ( array.getSize(), this->getSize(), "Array sizes must be the same." );
+   if( this->getSize() != array.getSize() )
+      this->setLike( array );
    if( this->getSize() > 0 )
       Algorithms::ArrayOperations< Device >::
          template copyMemory< Element,
                               Index >
                              ( this->getData(),
-                               array. getData(),
-                               array. getSize() );
+                               array.getData(),
+                               array.getSize() );
    return ( *this );
 template< typename Element,
           typename Device,
@@ -391,19 +376,19 @@ Array< Element, Device, Index >&
 Array< Element, Device, Index >::
 operator = ( const ArrayT& array )
-   TNL_ASSERT( array. getSize() == this->getSize(),
-              std::cerr << "Source size: " << array. getSize() << std::endl
-                        << "Target size: " << this->getSize() << std::endl );
+   //TNL_ASSERT_EQ( array.getSize(), this->getSize(), "Array sizes must be the same." );
+   if( this->getSize() != array.getSize() )
+      this->setLike( array );   
    if( this->getSize() > 0 )
       Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >::
          template copyMemory< Element,
                               typename ArrayT::ElementType,
                               typename ArrayT::IndexType >
                             ( this->getData(),
-                              array. getData(),
-                              array. getSize() );
+                              array.getData(),
+                              array.getSize() );
    return ( *this );
 template< typename Element,
           typename Device,
@@ -413,7 +398,7 @@ bool
 Array< Element, Device, Index >::
 operator == ( const ArrayT& array ) const
-   if( array. getSize() != this -> getSize() )
+   if( array.getSize() != this->getSize() )
       return false;
    if( this->getSize() == 0 )
       return true;
@@ -430,7 +415,7 @@ template< typename Element,
           typename Device,
           typename Index >
    template< typename ArrayT >
-bool Array< Element, Device, Index > :: operator != ( const ArrayT& array ) const
+bool Array< Element, Device, Index >::operator != ( const ArrayT& array ) const
    return ! ( ( *this ) == array );
@@ -439,9 +424,9 @@ bool Array< Element, Device, Index > :: operator != ( const ArrayT& array ) cons
 template< typename Element,
           typename Device,
           typename Index >
-void Array< Element, Device, Index > :: setValue( const Element& e )
+void Array< Element, Device, Index >::setValue( const Element& e )
-   TNL_ASSERT( this->getData(),);
+   TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array." );
    Algorithms::ArrayOperations< Device >::setMemory( this->getData(), e, this->getSize() );
@@ -449,7 +434,7 @@ template< typename Element,
           typename Device,
           typename Index >
-const Element* Array< Element, Device, Index > :: getData() const
+const Element* Array< Element, Device, Index >::getData() const
    return this -> data;
@@ -458,7 +443,7 @@ template< typename Element,
           typename Device,
           typename Index >
-Element* Array< Element, Device, Index > :: getData()
+Element* Array< Element, Device, Index >::getData()
    return this -> data;
@@ -466,43 +451,38 @@ Element* Array< Element, Device, Index > :: getData()
 template< typename Element,
           typename Device,
           typename Index >
-Array< Element, Device, Index > :: operator bool() const
+Array< Element, Device, Index >::operator bool() const
    return data != 0;
 template< typename Element,
           typename Device,
           typename Index >
    template< typename IndexType2 >
-void Array< Element, Device, Index > :: touch( IndexType2 touches ) const
+void Array< Element, Device, Index >::touch( IndexType2 touches ) const
    //TODO: implement
 template< typename Element,
           typename Device,
           typename Index >
-bool Array< Element, Device, Index > :: save( File& file ) const
+bool Array< Element, Device, Index >::save( File& file ) const
-   if( ! Object :: save( file ) )
-      return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< const Index, Devices::Host >( &this->size ) )
+   if( ! Object::save( file ) )
       return false;
-   if( ! file. write( &this->size ) )
+   if( ! file.write( &this->size ) )
       return false;
    if( this->size != 0 && ! ArrayIO< Element, Device, Index >::save( file, this->data, this->size ) )
-      cerr << "I was not able to save " << this->getType()
-           << " with size " << this -> getSize() << endl;
+      std::cerr << "I was not able to save " << this->getType()
+           << " with size " << this -> getSize() << std::endl;
       return false;
    return true;
 template< typename Element,
           typename Device,
@@ -511,19 +491,17 @@ bool
 Array< Element, Device, Index >::
 load( File& file )
-   if( ! Object :: load( file ) )
+   if( ! Object::load( file ) )
       return false;
    Index _size;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< Index, Devices::Host >( &_size ) )
-      return false;
-   if( ! file. read( &_size ) )
+   if( ! file.read( &_size ) )
+   {
+      std::cerr << "Unable to read the array size." << std::endl;
       return false;
+   }
    if( _size < 0 )
-      cerr << "Error: The size " << _size << " of the file is not a positive number or zero." << endl;
+      std::cerr << "Error: The size " << _size << " of the file is not a positive number or zero." << std::endl;
       return false;
    setSize( _size );
@@ -531,8 +509,8 @@ load( File& file )
       if( ! ArrayIO< Element, Device, Index >::load( file, this->data, this->size ) )
-         cerr << "I was not able to load " << this->getType()
-                    << " with size " << this -> getSize() << endl;
+         std::cerr << "I was not able to load " << this->getType()
+                    << " with size " << this -> getSize() << std::endl;
          return false;
@@ -546,19 +524,14 @@ bool
 Array< Element, Device, Index >::
 boundLoad( File& file )
-   if( ! Object :: load( file ) )
+   if( ! Object::load( file ) )
       return false;
    Index _size;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< Index, Devices::Host >( &_size ) )
-      return false;
-   if( ! file. read( &_size ) )
+   if( ! file.read( &_size ) )
       return false;
    if( _size < 0 )
-      cerr << "Error: The size " << _size << " of the file is not a positive number or zero." << endl;
+      std::cerr << "Error: The size " << _size << " of the file is not a positive number or zero." << std::endl;
       return false;
    if( this->getSize() != 0 )
@@ -575,37 +548,14 @@ boundLoad( File& file )
       if( ! ArrayIO< Element, Device, Index >::load( file, this->data, this->size ) )
-         cerr << "I was not able to load " << this->getType()
-                    << " with size " << this -> getSize() << endl;
+         std::cerr << "I was not able to load " << this->getType()
+                    << " with size " << this -> getSize() << std::endl;
          return false;
    return true;
-template< typename Element,
-          typename Device,
-          typename Index >
-Array< Element, Device, Index >::
-boundLoad( const String& fileName )
-   File file;
-   if( ! file. open( fileName, tnlReadMode ) )
-   {
-      cerr << "I am not bale to open the file " << fileName << " for reading." << endl;
-      return false;
-   }
-   if( ! this->boundLoad( file ) )
-      return false;
-   if( ! file. close() )
-   {
-      cerr << "An error occurred when I was closing the file " << fileName << "." << endl;
-      return false;
-   }
-   return true;
 template< typename Element,
           typename Device,
           typename Index >
@@ -623,7 +573,7 @@ std::ostream& operator << ( std::ostream& str, const Array< Element, Device, Ind
       str << v.getElement( 0 );
       for( Index i = 1; i < v.getSize(); i++ )
-         str << ", " << v. getElement( i );
+         str << ", " << v.getElement( i );
    str << " ]";
    return str;
diff --git a/src/TNL/Containers/CMakeLists.txt b/src/TNL/Containers/CMakeLists.txt
old mode 100755
new mode 100644
index b35fa285d9623457ddd5c45a887428a55161fde1..5ec11514d37db3dbbfbf00a952a909985f8de0c2
--- a/src/TNL/Containers/CMakeLists.txt
+++ b/src/TNL/Containers/CMakeLists.txt
@@ -4,6 +4,8 @@ set( headers Array.h
+             IndexedMap.h
+             IndexedMap_impl.h
diff --git a/src/TNL/Containers/ConstSharedArray.h b/src/TNL/Containers/ConstSharedArray.h
index d9435f843739dd6e3b300a590adaabea198eeb99..ec719634f604e95ac92f06ca2259f6990972ba2e 100644
--- a/src/TNL/Containers/ConstSharedArray.h
+++ b/src/TNL/Containers/ConstSharedArray.h
@@ -91,13 +91,8 @@ class tnlConstSharedArray : public Object
     * Every time one touches this grid touches * size * sizeof( Real ) bytes are added
     * to transfered bytes in tnlStatistics.
-#ifdef HAVE_NOT_CXX11
-   template< typename IndexType2 >
-   void touch( IndexType2 touches = 1 ) const;
    template< typename IndexType2 = Index >
    void touch( IndexType2 touches = 1 ) const;
    //! Method for saving the object to a file as a binary data.
    bool save( File& file ) const;
diff --git a/src/TNL/Containers/ConstSharedArray_impl.h b/src/TNL/Containers/ConstSharedArray_impl.h
index 93b662269f064e45c8dfb5b7024f0f9fbdb27c4f..fde53b35807267d85495f34d0d90ca38f69ba358 100644
--- a/src/TNL/Containers/ConstSharedArray_impl.h
+++ b/src/TNL/Containers/ConstSharedArray_impl.h
@@ -232,11 +232,7 @@ bool tnlConstSharedArray< Element, Device, Index > :: save( File& file ) const
               std::cerr << "You try to save empty array." );
    if( ! Object :: save( file ) )
       return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< const Index, Device >( &this->size ) )
    if( ! file. write( &this->size ) )
       return false;
    if( ! file. write< Element, Device, Index >( this->data, this->size ) )
diff --git a/src/TNL/Containers/IndexedMap.h b/src/TNL/Containers/IndexedMap.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c0e2d99471726fd08ee4c2529c8ca64ce63925d
--- /dev/null
+++ b/src/TNL/Containers/IndexedMap.h
@@ -0,0 +1,82 @@
+                          IndexedMap.h  -  description
+                             -------------------
+    begin                : Feb 15, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <map>
+#include <stdexcept>
+namespace TNL {
+namespace Containers {
+template< typename Element,
+          typename Index,
+          typename Key >
+class IndexedMap
+   public:
+   typedef Element   ElementType;
+   typedef Index     IndexType;
+   typedef Key       KeyType;
+   void reset();
+   IndexType getSize() const;
+   IndexType insert( const ElementType &data );
+   bool find( const ElementType &data, IndexType& index ) const;
+   template< typename ArrayType >
+   void toArray( ArrayType& array ) const;
+   const Element& getElement( KeyType key ) const;
+   Element& getElement( KeyType key );
+   void print( std::ostream& str ) const;
+   protected:
+   struct DataWithIndex
+   {
+      // This constructor is here only because of bug in g++, we might fix it later.
+      // http://stackoverflow.com/questions/22357887/comparing-two-mapiterators-why-does-it-need-the-copy-constructor-of-stdpair
+      DataWithIndex(){};
+      DataWithIndex( const DataWithIndex& d ) : data( d.data ), index( d.index) {}
+      explicit DataWithIndex( const Element data) : data( data ) {}
+      DataWithIndex( const Element data,
+                     const Index index) : data(data), index(index) {}
+      Element data;
+      Index index;
+   };
+   typedef std::map< Key, DataWithIndex >      STDMapType;
+   typedef typename STDMapType::value_type     STDMapValueType;
+   typedef typename STDMapType::const_iterator STDMapIteratorType;
+   STDMapType map;
+template< typename Element,
+          typename Index,
+          typename Key >
+std::ostream& operator <<( std::ostream& str, IndexedMap< Element, Index, Key >& set );
+} // namespace Containers
+} // namespace TNL
+#include <TNL/Containers/IndexedMap_impl.h>
diff --git a/src/TNL/Containers/IndexedMap_impl.h b/src/TNL/Containers/IndexedMap_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..2adb8872cd488d1ea2448024cff3521928b531a4
--- /dev/null
+++ b/src/TNL/Containers/IndexedMap_impl.h
@@ -0,0 +1,111 @@
+                          IndexedMap_impl.h  -  description
+                             -------------------
+    begin                : Feb 15, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+namespace TNL {
+namespace Containers {
+template< typename Element,
+          typename Index,
+          typename Key >
+void IndexedMap< Element, Index, Key >::reset()
+   map.clear();
+template< typename Element,
+          typename Index,
+          typename Key >
+Index IndexedMap< Element, Index, Key >::getSize() const
+   return map.size();
+template< typename Element,
+          typename Index,
+          typename Key >
+Index IndexedMap< Element, Index, Key >::insert( const Element &data )
+   STDMapIteratorType iter = map.insert( STDMapValueType( Key( data ),
+                                         DataWithIndex( data, getSize() ) ) ).first;
+   return iter->second.index;
+template< typename Element,
+          typename Index,
+          typename Key >
+bool IndexedMap< Element, Index, Key >::find( const Element &data, Index& index ) const
+   STDMapIteratorType iter = map.find( Key( data ) );
+   if (iter == map.end())
+      return false;
+   index = iter->second.index;
+   return true;
+template< typename Element,
+          typename Index,
+          typename Key >
+   template<typename ArrayType>
+void IndexedMap< Element, Index, Key >::toArray( ArrayType& array ) const
+   Assert( array.getSize() == getSize(),
+              std::cerr << "array.getSize() = " << array.getSize()
+                   << " getSize() = " << getSize() );
+   for( STDMapIteratorType iter = map.begin();
+        iter != map.end();
+        ++iter)
+      array[ iter->second.index ] = iter->second.data;
+template< typename Element,
+          typename Index,
+          typename Key >
+const Element& IndexedMap< Element, Index, Key >::getElement( KeyType key ) const
+   return map[ key ];
+template< typename Element,
+          typename Index,
+          typename Key >
+Element& IndexedMap< Element, Index, Key >::getElement( KeyType key )
+   return map[ key ];
+template< typename Element,
+          typename Index,
+          typename Key >
+void IndexedMap< Element, Index, Key >::print( std::ostream& str ) const
+   STDMapIteratorType iter = map.begin();
+   str << iter->second.data;
+   iter++;
+   while( iter != map.end() )
+   {
+      str << ", " << iter->second.data;
+      iter++;
+   }
+template< typename Element,
+          typename Index,
+          typename Key >
+std::ostream& operator<<( std::ostream& str, IndexedMap< Element, Index, Key >& set )
+   set.print( str );
+   return str;
+} // namespace Containers
+} // namespace TNL
diff --git a/src/TNL/Containers/IndexedSet.h b/src/TNL/Containers/IndexedSet.h
index e2c7e887ec534ba206c559aab73e684b14a793dd..087918a516999ec8a9d012c1aa7e98d2f412a02a 100644
--- a/src/TNL/Containers/IndexedSet.h
+++ b/src/TNL/Containers/IndexedSet.h
@@ -11,70 +11,45 @@
 #pragma once
 #include <map>
-#include <stdexcept>
+#include <ostream>
 namespace TNL {
 namespace Containers {
-template< typename Element,
-          typename Index,
-          typename Key >
+template< class Key,
+          class Index,
+          class Compare = std::less< Key >,
+          class Allocator = std::allocator< std::pair< const Key, Index > > >
 class IndexedSet
-   public:
+   using map_type = std::map< Key, Index, Compare, Allocator >;
+   map_type map;
-   typedef Element   ElementType;
-   typedef Index     IndexType;
-   typedef Key       KeyType;
+   using key_type = Key;
+   using index_type = Index;
+   using value_type = typename map_type::value_type;
+   using size_type = typename map_type::size_type;
-   void reset();
+   void clear();
-   IndexType getSize() const;
+   size_type size() const;
-   IndexType insert( const ElementType &data );
+   Index insert( const Key& key );
-   bool find( const ElementType &data, IndexType& index ) const;
+   bool find( const Key& key, Index& index ) const;
-   template< typename ArrayType >
-   void toArray( ArrayType& array ) const;
+   size_type count( const Key& key ) const;
-   const Element& getElement( KeyType key ) const;
+   size_type erase( const Key& key );
-   Element& getElement( KeyType key );
    void print( std::ostream& str ) const;
-   protected:
-   struct DataWithIndex
-   {
-      // This constructor is here only because of bug in g++, we might fix it later.
-      // http://stackoverflow.com/questions/22357887/comparing-two-mapiterators-why-does-it-need-the-copy-constructor-of-stdpair
-      DataWithIndex(){};
-      DataWithIndex( const DataWithIndex& d ) : data( d.data ), index( d.index) {}
-      explicit DataWithIndex( const Element data) : data( data ) {}
-      DataWithIndex( const Element data,
-                     const Index index) : data(data), index(index) {}
-      Element data;
-      Index index;
-   };
-   typedef std::map< Key, DataWithIndex >      STDMapType;
-   typedef typename STDMapType::value_type     STDMapValueType;
-   typedef typename STDMapType::const_iterator STDMapIteratorType;
-   STDMapType map;
 template< typename Element,
-          typename Index,
-          typename Key >
-std::ostream& operator <<( std::ostream& str, IndexedSet< Element, Index, Key >& set );
+          typename Index >
+std::ostream& operator <<( std::ostream& str, IndexedSet< Element, Index >& set );
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/IndexedSet_impl.h b/src/TNL/Containers/IndexedSet_impl.h
index 54c326f65bb24c877fd001c51c8a558cabebc27b..12ac49dba43c8da77a4f201451d96a9eba583e1a 100644
--- a/src/TNL/Containers/IndexedSet_impl.h
+++ b/src/TNL/Containers/IndexedSet_impl.h
@@ -10,85 +10,83 @@
 #pragma once
+#include <TNL/Containers/IndexedSet.h>
 namespace TNL {
 namespace Containers {
-template< typename Element,
-          typename Index,
-          typename Key >
-void IndexedSet< Element, Index, Key >::reset()
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+IndexedSet< Key, Index, Compare, Allocator >::clear()
-template< typename Element,
-          typename Index,
-          typename Key >
-Index IndexedSet< Element, Index, Key >::getSize() const
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+typename IndexedSet< Key, Index, Compare, Allocator >::size_type
+IndexedSet< Key, Index, Compare, Allocator >::size() const
    return map.size();
-template< typename Element,
-          typename Index,
-          typename Key >
-Index IndexedSet< Element, Index, Key >::insert( const Element &data )
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+IndexedSet< Key, Index, Compare, Allocator >::insert( const Key& key )
-   STDMapIteratorType iter = map.insert( STDMapValueType( Key( data ),
-                                         DataWithIndex( data, getSize() ) ) ).first;
-   return iter->second.index;
+   auto iter = map.insert( value_type( key, size() ) ).first;
+   return iter->second;
-template< typename Element,
-          typename Index,
-          typename Key >
-bool IndexedSet< Element, Index, Key >::find( const Element &data, Index& index ) const
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+IndexedSet< Key, Index, Compare, Allocator >::find( const Key& key, Index& index ) const
-   STDMapIteratorType iter = map.find( Key( data ) );
-   if (iter == map.end())
+   auto iter = map.find( Key( key ) );
+   if( iter == map.end() )
       return false;
    index = iter->second.index;
    return true;
-template< typename Element,
-          typename Index,
-          typename Key >
-   template<typename ArrayType>
-void IndexedSet< Element, Index, Key >::toArray( ArrayType& array ) const
-   TNL_ASSERT( array.getSize() == getSize(),
-              std::cerr << "array.getSize() = " << array.getSize()
-                   << " getSize() = " << getSize() );
-   for( STDMapIteratorType iter = map.begin();
-        iter != map.end();
-        ++iter)
-      array[ iter->second.index ] = iter->second.data;
-template< typename Element,
-          typename Index,
-          typename Key >
-const Element& IndexedSet< Element, Index, Key >::getElement( KeyType key ) const
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+typename IndexedSet< Key, Index, Compare, Allocator >::size_type
+IndexedSet< Key, Index, Compare, Allocator >::count( const Key& key ) const
-   return map[ key ];
+   return map.count( key );
-template< typename Element,
-          typename Index,
-          typename Key >
-Element& IndexedSet< Element, Index, Key >::getElement( KeyType key )
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+typename IndexedSet< Key, Index, Compare, Allocator >::size_type
+IndexedSet< Key, Index, Compare, Allocator >::erase( const Key& key )
-   return map[ key ];
+   return map.erase( key );
-template< typename Element,
-          typename Index,
-          typename Key >
-void IndexedSet< Element, Index, Key >::print( std::ostream& str ) const
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+void IndexedSet< Key, Index, Compare, Allocator >::print( std::ostream& str ) const
-   STDMapIteratorType iter = map.begin();
+   auto iter = map.begin();
    str << iter->second.data;
    while( iter != map.end() )
@@ -98,10 +96,11 @@ void IndexedSet< Element, Index, Key >::print( std::ostream& str ) const
-template< typename Element,
-          typename Index,
-          typename Key >
-std::ostream& operator<<( std::ostream& str, IndexedSet< Element, Index, Key >& set )
+template< class Key,
+          class Index,
+          class Compare,
+          class Allocator >
+std::ostream& operator<<( std::ostream& str, IndexedSet< Key, Index, Compare, Allocator >& set )
    set.print( str );
    return str;
diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h
index adb163374dc7a3e9d7bbb626605bbebeb2e0c92d..0ec42106de2b49f5296e8a0aede1328715eea3f9 100644
--- a/src/TNL/Containers/List.h
+++ b/src/TNL/Containers/List.h
@@ -10,7 +10,6 @@
 #pragma once
-#include <stdlib.h>
 #include <iostream>
 #include <TNL/Assert.h>
@@ -41,9 +40,7 @@ template< class T > class ListDataElement;
 template< class T > class List
       typedef T ElementType;
       //! Basic constructor
@@ -118,7 +115,6 @@ template< class T > class List
       bool DeepLoad( File& file );
       //! Pointer to the first element
       ListDataElement< T >* first;
@@ -135,8 +131,6 @@ template< class T > class List
       //! Iterator index
       mutable int index;
 template< typename T > std::ostream& operator << ( std::ostream& str, const List< T >& list );
diff --git a/src/TNL/Containers/List_impl.h b/src/TNL/Containers/List_impl.h
index f91f30ca2b7fd7578a109d0786bfcbc93c91a100..136f4cc986a35063a8be9f52ab4fa4e705b6c273 100644
--- a/src/TNL/Containers/List_impl.h
+++ b/src/TNL/Containers/List_impl.h
@@ -1,5 +1,5 @@
-                          tnlList_impl.h  -  description
+                          List_impl.h  -  description
     begin                : Mar, 5 Apr 2016 12:46 PM
     copyright            : (C) 2016 by Tomas Oberhuber
@@ -131,12 +131,10 @@ bool List< T >::Append( const T& data )
       TNL_ASSERT( ! last, );
       first = last = new ListDataElement< T >( data );
-      if( ! first ) return false;
       ListDataElement< T >* new_element =  new ListDataElement< T >( data, last, 0 );
-      if( ! new_element ) return false;
       TNL_ASSERT( last, );
       last = last -> Next() = new_element;
@@ -151,12 +149,10 @@ bool List< T >::Prepend( const T& data )
       TNL_ASSERT( ! last, );
       first = last = new ListDataElement< T >( data );
-      if( ! first ) return false;
       ListDataElement< T >* new_element =  new ListDataElement< T >( data, 0, first );
-      if( ! new_element ) return false;
       first = first -> Previous() = new_element;
    size ++;
@@ -175,7 +171,6 @@ bool List< T >::Insert( const T& data, const int& ind )
       new ListDataElement< T >( data,
                              iterator -> Previous(),
                              iterator );
-   if( ! new_el ) return false;
    iterator -> Previous() -> Next() = new_el;
    iterator -> Previous() = new_el;
    iterator = new_el;
@@ -208,9 +203,7 @@ template< typename T >
    template< typename Array >
 void List< T >::toArray( Array& array )
-   TNL_ASSERT( this->getSize() <= array.getSize(),
-              std::cerr << "this->getSize() = " << this->getSize()
-                   << " array.getSize() = " << array.getSize() << std::endl; );
+   array.setSize( this->getSize() );
    for( int i = 0; i < this->getSize(); i++ )
       array[ i ] = ( *this )[ i ];
@@ -281,59 +274,25 @@ void List< T >::DeepEraseAll()
 template< typename T >
 bool List< T >::Save( File& file ) const
-#ifdef HAVE_NOT_CXX11
-   file.write< const int, Devices::Host >( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! file. write< int, Devices::Host, int >( &operator[]( i ), 1 ) )
-         return false;
-   return true;
    file.write( &size );
    for( int i = 0; i < size; i ++ )
       if( ! file. write( &operator[]( i ), 1 ) )
          return false;
    return true;
 template< typename T >
 bool List< T >::DeepSave( File& file ) const
-#ifdef HAVE_NOT_CXX11
-   file. write< const int, Devices::Host >( &size );
-   for( int i = 0; i < size; i ++ )
-      if( ! operator[]( i ). save( file ) ) return false;
-   return true;
    file. write( &size );
    for( int i = 0; i < size; i ++ )
       if( ! operator[]( i ). save( file ) ) return false;
    return true;
 template< typename T >
 bool List< T >::Load( File& file )
-#ifdef HAVE_NOT_CXX11
-   reset();
-   int _size;
-   file. read< int, Devices::Host >( &_size );
-   if( _size < 0 )
-   {
-      std::cerr << "The curve size is negative." << std::endl;
-      return false;
-   }
-   T t;
-   for( int i = 0; i < _size; i ++ )
-   {
-      if( ! file. read< T, Devices::Host >( &t ) )
-         return false;
-      Append( t );
-   }
-   return true;
    int _size;
    file. read( &_size, 1 );
@@ -350,29 +309,11 @@ bool List< T >::Load( File& file )
       Append( t );
    return true;
 template< typename T >
 bool List< T >::DeepLoad( File& file )
-#ifdef HAVE_NOT_CXX11
-   reset();
-   int _size;
-   file. read< int, Devices::Host >( &_size );
-   if( _size < 0 )
-   {
-      std::cerr << "The list size is negative." << std::endl;
-      return false;
-   }
-   for( int i = 0; i < _size; i ++ )
-   {
-      T t;
-      if( ! t. load( file ) ) return false;
-      Append( t );
-   }
-   return true;
    int _size;
    file. read( &_size );
@@ -388,7 +329,6 @@ bool List< T >::DeepLoad( File& file )
       Append( t );
    return true;
 template< typename T >
diff --git a/src/TNL/Containers/MultiArray.h b/src/TNL/Containers/MultiArray.h
index 1aabf733bd3bae912818eb1768c39a2b28402c76..f9de98a81f1141f244f9492a3d3cfeb3e85bbabe 100644
--- a/src/TNL/Containers/MultiArray.h
+++ b/src/TNL/Containers/MultiArray.h
@@ -45,9 +45,9 @@ class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, I
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index iSize );
+   void setDimensions( const Index iSize );
-   bool setDimensions( const Containers::StaticVector< 1, Index >& dimensions );
+   void setDimensions( const Containers::StaticVector< 1, Index >& dimensions );
    __cuda_callable__ void getDimensions( Index& iSize ) const;
@@ -55,7 +55,7 @@ class MultiArray< 1, Element, Device, Index > : public Array< Element, Device, I
    //! Set dimensions of the array using another array as a template
    template< typename MultiArray >
-   bool setLike( const MultiArray& v );
+   void setLike( const MultiArray& v );
    void reset();
@@ -124,9 +124,9 @@ class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, I
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index jSize, const Index iSize );
+   void setDimensions( const Index jSize, const Index iSize );
-   bool setDimensions( const Containers::StaticVector< 2, Index >& dimensions );
+   void setDimensions( const Containers::StaticVector< 2, Index >& dimensions );
    __cuda_callable__ void getDimensions( Index& jSize, Index& iSize ) const;
@@ -134,7 +134,7 @@ class MultiArray< 2, Element, Device, Index > : public Array< Element, Device, I
    //! Set dimensions of the array using another array as a template
    template< typename MultiArray >
-   bool setLike( const MultiArray& v );
+   void setLike( const MultiArray& v );
    void reset();
@@ -207,9 +207,9 @@ class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, I
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index k, const Index j, const Index iSize );
+   void setDimensions( const Index k, const Index j, const Index iSize );
-   bool setDimensions( const Containers::StaticVector< 3, Index >& dimensions );
+   void setDimensions( const Containers::StaticVector< 3, Index >& dimensions );
    __cuda_callable__ void getDimensions( Index& k, Index& j, Index& iSize ) const;
@@ -217,7 +217,7 @@ class MultiArray< 3, Element, Device, Index > : public Array< Element, Device, I
    //! Set dimensions of the array using another array as a template
    template< typename MultiArrayT >
-   bool setLike( const MultiArrayT& v );
+   void setLike( const MultiArrayT& v );
    void reset();
@@ -290,9 +290,9 @@ class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, I
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index l, const Index k, const Index j, const Index iSize );
+   void setDimensions( const Index l, const Index k, const Index j, const Index iSize );
-   bool setDimensions( const Containers::StaticVector< 4, Index >& dimensions );
+   void setDimensions( const Containers::StaticVector< 4, Index >& dimensions );
    __cuda_callable__ void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const;
@@ -300,7 +300,7 @@ class MultiArray< 4, Element, Device, Index > : public Array< Element, Device, I
    //! Set dimensions of the array using another array as a template
    template< typename MultiArrayT >
-   bool setLike( const MultiArrayT& v );
+   void setLike( const MultiArrayT& v );
    void reset();
diff --git a/src/TNL/Containers/MultiArray1D_impl.h b/src/TNL/Containers/MultiArray1D_impl.h
index ec64e060053a57cbfbcae0240961df0619fc4d79..d380aa831fdba0b302dee9743de4dbb7574d3d4c 100644
--- a/src/TNL/Containers/MultiArray1D_impl.h
+++ b/src/TNL/Containers/MultiArray1D_impl.h
@@ -57,28 +57,28 @@ String MultiArray< 1, Element, Device, Index > :: getSerializationTypeVirtual()
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: setDimensions( const Index iSize )
+void MultiArray< 1, Element, Device, Index > :: setDimensions( const Index iSize )
    TNL_ASSERT( iSize > 0,
               std::cerr << "iSize = " << iSize );
    dimensions[ 0 ] = iSize;
-   return Array< Element, Device, Index >::setSize( iSize );
+   Array< Element, Device, Index >::setSize( iSize );
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 1, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 1, Index >& dimensions )
+void MultiArray< 1, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 1, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0,
               std::cerr << " dimensions[ 0 ] = " << dimensions[ 0 ] );
    this->dimensions = dimensions;
-   return Array< Element, Device, Index >::setSize( this->dimensions[ 0 ] );
+   Array< Element, Device, Index >::setSize( this->dimensions[ 0 ] );
 template< typename Element, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 1, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 1, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
-   return setDimensions( multiArray. getDimensions() );
+   setDimensions( multiArray. getDimensions() );
 template< typename Element, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiArray2D_impl.h b/src/TNL/Containers/MultiArray2D_impl.h
index cbaca350a01a8a84fdac2924c01c137610941597..3812429c8260d75874b74b249876a5d9e8e56cf1 100644
--- a/src/TNL/Containers/MultiArray2D_impl.h
+++ b/src/TNL/Containers/MultiArray2D_impl.h
@@ -57,7 +57,7 @@ String MultiArray< 2, Element, Device, Index > :: getSerializationTypeVirtual()
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: setDimensions( const Index jSize,
+void MultiArray< 2, Element, Device, Index > :: setDimensions( const Index jSize,
                                                                   const Index iSize )
    TNL_ASSERT( iSize > 0 && jSize > 0,
@@ -66,11 +66,11 @@ bool MultiArray< 2, Element, Device, Index > :: setDimensions( const Index jSize
    dimensions[ 0 ] = iSize;
    dimensions[ 1 ] = jSize;
-   return Array< Element, Device, Index > :: setSize( iSize * jSize );
+   Array< Element, Device, Index > :: setSize( iSize * jSize );
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 2, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 2, Index >& dimensions )
+void MultiArray< 2, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 2, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0,
               std::cerr << "dimensions = " << dimensions );
@@ -79,14 +79,14 @@ bool MultiArray< 2, Element, Device, Index > :: setDimensions( const Containers:
    this->dimensions. x() = dimensions. y();
    this->dimensions. y() = dimensions. x();
-   return Array< Element, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
+   Array< Element, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
 template< typename Element, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 2, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 2, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
-   return setDimensions( multiArray. getDimensions() );
+   setDimensions( multiArray. getDimensions() );
 template< typename Element, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiArray3D_impl.h b/src/TNL/Containers/MultiArray3D_impl.h
index 750efab1b71834ab058744b8eb081a4f4ed21396..6e9fb9d9f2697d997ab32522257c2b1be23185bd 100644
--- a/src/TNL/Containers/MultiArray3D_impl.h
+++ b/src/TNL/Containers/MultiArray3D_impl.h
@@ -57,7 +57,7 @@ String MultiArray< 3, Element, Device, Index > :: getSerializationTypeVirtual()
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 3, Element, Device, Index > :: setDimensions( const Index kSize,
+void MultiArray< 3, Element, Device, Index > :: setDimensions( const Index kSize,
                                                                        const Index jSize,
                                                                        const Index iSize )
@@ -69,11 +69,11 @@ bool MultiArray< 3, Element, Device, Index > :: setDimensions( const Index kSize
    dimensions[ 0 ] = iSize;
    dimensions[ 1 ] = jSize;
    dimensions[ 2 ] = kSize;
-   return Array< Element, Device, Index > :: setSize( iSize * jSize * kSize );
+   Array< Element, Device, Index > :: setSize( iSize * jSize * kSize );
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 3, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 3, Index >& dimensions )
+void MultiArray< 3, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 3, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ],
               std::cerr << "dimensions = " << dimensions );
@@ -83,16 +83,16 @@ bool MultiArray< 3, Element, Device, Index > :: setDimensions( const Containers:
    this->dimensions. x() = dimensions. z();
    this->dimensions. y() = dimensions. y();
    this->dimensions. z() = dimensions. x();
-   return Array< Element, Device, Index > :: setSize( this->dimensions[ 2 ] *
-                                                          this->dimensions[ 1 ] *
-                                                          this->dimensions[ 0 ] );
+   Array< Element, Device, Index > :: setSize( this->dimensions[ 2 ] *
+                                               this->dimensions[ 1 ] *
+                                               this->dimensions[ 0 ] );
 template< typename Element, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 3, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 3, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
-   return setDimensions( multiArray. getDimensions() );
+   setDimensions( multiArray. getDimensions() );
 template< typename Element, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiArray4D_impl.h b/src/TNL/Containers/MultiArray4D_impl.h
index 5f022247c086461bb11786fda96e4e90f25d7b42..ec034b3d21b6b8de53fa3326c0f240a5a59a8884 100644
--- a/src/TNL/Containers/MultiArray4D_impl.h
+++ b/src/TNL/Containers/MultiArray4D_impl.h
@@ -58,7 +58,7 @@ String MultiArray< 4, Element, Device, Index > :: getSerializationTypeVirtual()
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 4, Element, Device, Index > :: setDimensions( const Index lSize,
+void MultiArray< 4, Element, Device, Index > :: setDimensions( const Index lSize,
                                                                        const Index kSize,
                                                                        const Index jSize,
                                                                        const Index iSize )
@@ -73,11 +73,11 @@ bool MultiArray< 4, Element, Device, Index > :: setDimensions( const Index lSize
    dimensions[ 1 ] = jSize;
    dimensions[ 2 ] = kSize;
    dimensions[ 3 ] = lSize;
-   return Array< Element, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
+   Array< Element, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
 template< typename Element, typename Device, typename Index >
-bool MultiArray< 4, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 4, Index >& dimensions )
+void MultiArray< 4, Element, Device, Index > :: setDimensions( const Containers::StaticVector< 4, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ] && dimensions[ 3 ] > 0,
               std::cerr << "dimensions = " << dimensions );
@@ -88,17 +88,17 @@ bool MultiArray< 4, Element, Device, Index > :: setDimensions( const Containers:
    this->dimensions[ 1 ] = dimensions[ 2 ];
    this->dimensions[ 2 ] = dimensions[ 1 ];
    this->dimensions[ 3 ] = dimensions[ 0 ];
-   return Array< Element, Device, Index > :: setSize( this->dimensions[ 3 ] *
-                                                         this->dimensions[ 2 ] *
-                                                         this->dimensions[ 1 ] *
-                                                         this->dimensions[ 0 ] );
+   Array< Element, Device, Index > :: setSize( this->dimensions[ 3 ] *
+                                               this->dimensions[ 2 ] *
+                                               this->dimensions[ 1 ] *
+                                               this->dimensions[ 0 ] );
 template< typename Element, typename Device, typename Index >
    template< typename MultiArrayT >
-bool MultiArray< 4, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
+void MultiArray< 4, Element, Device, Index > :: setLike( const MultiArrayT& multiArray )
-   return setDimensions( multiArray. getDimensions() );
+   setDimensions( multiArray. getDimensions() );
 template< typename Element, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiVector.h b/src/TNL/Containers/MultiVector.h
index fd89f2c40cbf054ba484d01782ca5d1e1801efd6..344321ce59fc10a6a04672631f5449c92cd3ca9d 100644
--- a/src/TNL/Containers/MultiVector.h
+++ b/src/TNL/Containers/MultiVector.h
@@ -45,9 +45,9 @@ class MultiVector< 1, Real, Device, Index > : public Vector< Real, Device, Index
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index iSize );
+   void setDimensions( const Index iSize );
-   bool setDimensions( const StaticVector< Dimension, Index >& dimensions );
+   void setDimensions( const StaticVector< Dimension, Index >& dimensions );
    void getDimensions( Index& iSize ) const;
@@ -55,7 +55,7 @@ class MultiVector< 1, Real, Device, Index > : public Vector< Real, Device, Index
    //! Set dimensions of the Vector using another Vector as a template
    template< typename MultiVector >
-   bool setLike( const MultiVector& v );
+   void setLike( const MultiVector& v );
    Index getElementIndex( const Index i ) const;
@@ -126,9 +126,9 @@ class MultiVector< 2, Real, Device, Index > : public Vector< Real, Device, Index
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index jSize, const Index iSize );
+   void setDimensions( const Index jSize, const Index iSize );
-   bool setDimensions( const StaticVector< 2, Index >& dimensions );
+   void setDimensions( const StaticVector< 2, Index >& dimensions );
    void getDimensions( Index& jSize, Index& iSize ) const;
@@ -136,7 +136,7 @@ class MultiVector< 2, Real, Device, Index > : public Vector< Real, Device, Index
    //! Set dimensions of the Vector using another Vector as a template
    template< typename MultiVector >
-   bool setLike( const MultiVector& v );
+   void setLike( const MultiVector& v );
    Index getElementIndex( const Index j, const Index i ) const;
@@ -208,9 +208,9 @@ class MultiVector< 3, Real, Device, Index > : public Vector< Real, Device, Index
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index k, const Index j, const Index iSize );
+   void setDimensions( const Index k, const Index j, const Index iSize );
-   bool setDimensions( const StaticVector< 3, Index >& dimensions );
+   void setDimensions( const StaticVector< 3, Index >& dimensions );
    void getDimensions( Index& k, Index& j, Index& iSize ) const;
@@ -218,7 +218,7 @@ class MultiVector< 3, Real, Device, Index > : public Vector< Real, Device, Index
    //! Set dimensions of the Vector using another Vector as a template
    template< typename MultiVector >
-   bool setLike( const MultiVector& v );
+   void setLike( const MultiVector& v );
    Index getElementIndex( const Index k, const Index j, const Index i ) const;
@@ -290,9 +290,9 @@ class MultiVector< 4, Real, Device, Index > : public Vector< Real, Device, Index
    virtual String getSerializationTypeVirtual() const;
-   bool setDimensions( const Index l, const Index k, const Index j, const Index iSize );
+   void setDimensions( const Index l, const Index k, const Index j, const Index iSize );
-   bool setDimensions( const StaticVector< 4, Index >& dimensions );
+   void setDimensions( const StaticVector< 4, Index >& dimensions );
    void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const;
@@ -300,7 +300,7 @@ class MultiVector< 4, Real, Device, Index > : public Vector< Real, Device, Index
    //! Set dimensions of the Vector using another Vector as a template
    template< typename MultiVector >
-   bool setLike( const MultiVector& v );
+   void setLike( const MultiVector& v );
    Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const;
diff --git a/src/TNL/Containers/MultiVector1D_impl.h b/src/TNL/Containers/MultiVector1D_impl.h
index 93ee232f34c6331d716982f4be4e2358fe776050..eb0e3109f636fbe5c45f396dc63a9e3deb8d0561 100644
--- a/src/TNL/Containers/MultiVector1D_impl.h
+++ b/src/TNL/Containers/MultiVector1D_impl.h
@@ -57,28 +57,28 @@ String MultiVector< 1, Real, Device, Index > :: getSerializationTypeVirtual() co
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 1, Real, Device, Index > :: setDimensions( const Index iSize )
+void MultiVector< 1, Real, Device, Index > :: setDimensions( const Index iSize )
    TNL_ASSERT( iSize > 0,
               std::cerr << "iSize = " << iSize );
    dimensions[ 0 ] = iSize;
-   return Vector< Real, Device, Index > :: setSize( iSize );
+   Vector< Real, Device, Index > :: setSize( iSize );
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 1, Real, Device, Index > :: setDimensions( const StaticVector< Dimension, Index >& dimensions )
+void MultiVector< 1, Real, Device, Index > :: setDimensions( const StaticVector< Dimension, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0,
               std::cerr << " dimensions[ 0 ] = " << dimensions[ 0 ] );
    this->dimensions = dimensions;
-   return Vector< Real, Device, Index > :: setSize( this->dimensions[ 0 ] );
+   Vector< Real, Device, Index > :: setSize( this->dimensions[ 0 ] );
 template< typename Real, typename Device, typename Index >
    template< typename MultiVectorT >
-bool MultiVector< 1, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
+void MultiVector< 1, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
-   return setDimensions( multiVector. getDimensions() );
+   setDimensions( multiVector. getDimensions() );
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiVector2D_impl.h b/src/TNL/Containers/MultiVector2D_impl.h
index 0abbd6cc04bf19e1b3b5402d7a57d73219a30db7..7eb483e9ed122ecff6a6e4d67ce2d9d25d06fb15 100644
--- a/src/TNL/Containers/MultiVector2D_impl.h
+++ b/src/TNL/Containers/MultiVector2D_impl.h
@@ -57,8 +57,8 @@ String MultiVector< 2, Real, Device, Index > :: getSerializationTypeVirtual() co
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 2, Real, Device, Index > :: setDimensions( const Index jSize,
-                                                                       const Index iSize )
+void MultiVector< 2, Real, Device, Index > :: setDimensions( const Index jSize,
+                                                             const Index iSize )
    TNL_ASSERT( iSize > 0 && jSize > 0,
               std::cerr << "iSize = " << iSize
@@ -66,23 +66,23 @@ bool MultiVector< 2, Real, Device, Index > :: setDimensions( const Index jSize,
    dimensions[ 0 ] = iSize;
    dimensions[ 1 ] = jSize;
-   return Vector< Real, Device, Index > :: setSize( iSize * jSize );
+   Vector< Real, Device, Index > :: setSize( iSize * jSize );
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 2, Real, Device, Index > :: setDimensions( const StaticVector< 2, Index >& dimensions )
+void MultiVector< 2, Real, Device, Index > :: setDimensions( const StaticVector< 2, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0,
               std::cerr << "dimensions = " << dimensions );
    this->dimensions = dimensions;
-   return Vector< Real, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
+   Vector< Real, Device, Index > :: setSize( this->dimensions[ 1 ] * this->dimensions[ 0 ] );
 template< typename Real, typename Device, typename Index >
    template< typename MultiVectorT >
-bool MultiVector< 2, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
+void MultiVector< 2, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
-   return setDimensions( multiVector. getDimensions() );
+   setDimensions( multiVector. getDimensions() );
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiVector3D_impl.h b/src/TNL/Containers/MultiVector3D_impl.h
index 0ee9304075c5c03e3a2f974535dc317904db2922..24d92f0e262a32e77b8c375f07f13bb40742d5e5 100644
--- a/src/TNL/Containers/MultiVector3D_impl.h
+++ b/src/TNL/Containers/MultiVector3D_impl.h
@@ -57,9 +57,9 @@ String MultiVector< 3, Real, Device, Index > :: getSerializationTypeVirtual() co
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 3, Real, Device, Index > :: setDimensions( const Index kSize,
-                                                                       const Index jSize,
-                                                                       const Index iSize )
+void MultiVector< 3, Real, Device, Index > :: setDimensions( const Index kSize,
+                                                             const Index jSize,
+                                                             const Index iSize )
    TNL_ASSERT( iSize > 0 && jSize > 0 && kSize > 0,
               std::cerr << "iSize = " << iSize
@@ -73,21 +73,21 @@ bool MultiVector< 3, Real, Device, Index > :: setDimensions( const Index kSize,
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 3, Real, Device, Index > :: setDimensions( const StaticVector< 3, Index >& dimensions )
+void MultiVector< 3, Real, Device, Index > :: setDimensions( const StaticVector< 3, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ],
               std::cerr << "dimensions = " << dimensions );
    this->dimensions = dimensions;
-   return Vector< Real, Device, Index > :: setSize( this->dimensions[ 2 ] *
-                                                          this->dimensions[ 1 ] *
-                                                          this->dimensions[ 0 ] );
+   Vector< Real, Device, Index > :: setSize( this->dimensions[ 2 ] *
+                                             this->dimensions[ 1 ] *
+                                             this->dimensions[ 0 ] );
 template< typename Real, typename Device, typename Index >
    template< typename MultiVectorT >
-bool MultiVector< 3, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
+void MultiVector< 3, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
-   return setDimensions( multiVector. getDimensions() );
+   setDimensions( multiVector. getDimensions() );
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Containers/MultiVector4D_impl.h b/src/TNL/Containers/MultiVector4D_impl.h
index 3db8a363fc55efcdf618a68b3534393aceac0a79..f9c883371f53439e9921280a844cb6c21175ff19 100644
--- a/src/TNL/Containers/MultiVector4D_impl.h
+++ b/src/TNL/Containers/MultiVector4D_impl.h
@@ -57,10 +57,10 @@ String MultiVector< 4, Real, Device, Index > :: getSerializationTypeVirtual() co
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 4, Real, Device, Index > :: setDimensions( const Index lSize,
-                                                                       const Index kSize,
-                                                                       const Index jSize,
-                                                                       const Index iSize )
+void MultiVector< 4, Real, Device, Index > :: setDimensions( const Index lSize,
+                                                             const Index kSize,
+                                                             const Index jSize,
+                                                             const Index iSize )
    TNL_ASSERT( iSize > 0 && jSize > 0 && kSize > 0 && lSize > 0,
               std::cerr << "iSize = " << iSize
@@ -72,26 +72,26 @@ bool MultiVector< 4, Real, Device, Index > :: setDimensions( const Index lSize,
    dimensions[ 1 ] = jSize;
    dimensions[ 2 ] = kSize;
    dimensions[ 3 ] = lSize;
-   return Vector< Real, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
+   Vector< Real, Device, Index > :: setSize( iSize * jSize * kSize * lSize );
 template< typename Real, typename Device, typename Index >
-bool MultiVector< 4, Real, Device, Index > :: setDimensions( const StaticVector< 4, Index >& dimensions )
+void MultiVector< 4, Real, Device, Index > :: setDimensions( const StaticVector< 4, Index >& dimensions )
    TNL_ASSERT( dimensions[ 0 ] > 0 && dimensions[ 1 ] > 0 && dimensions[ 2 ] && dimensions[ 3 ] > 0,
               std::cerr << "dimensions = " << dimensions );
    this->dimensions = dimensions;
-   return Vector< Real, Device, Index > :: setSize( this->dimensions[ 3 ] *
-                                                          this->dimensions[ 2 ] *
-                                                          this->dimensions[ 1 ] *
-                                                          this->dimensions[ 0 ] );
+   Vector< Real, Device, Index > :: setSize( this->dimensions[ 3 ] *
+                                             this->dimensions[ 2 ] *
+                                             this->dimensions[ 1 ] *
+                                             this->dimensions[ 0 ] );
 template< typename Real, typename Device, typename Index >
    template< typename MultiVectorT >
-bool MultiVector< 4, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
+void MultiVector< 4, Real, Device, Index > :: setLike( const MultiVectorT& multiVector )
-   return setDimensions( multiVector. getDimensions() );
+   setDimensions( multiVector. getDimensions() );
 template< typename Real, typename Device, typename Index >
diff --git a/src/TNL/Containers/SharedArray.h b/src/TNL/Containers/SharedArray.h
index 0c4684830b9addd29e55a8d37fcbf34ceda9aafe..4feaf7862bd7fc35a17e35e69a6230466d0cdf49 100644
--- a/src/TNL/Containers/SharedArray.h
+++ b/src/TNL/Containers/SharedArray.h
@@ -44,17 +44,25 @@ class SharedArray : public Object
    typedef SharedArray< Element, Devices::Host, Index > HostType;
    typedef SharedArray< Element, Devices::Cuda, Index > CudaType;
+   #ifndef HAVE_MIC
+   #endif
+   #ifndef HAVE_MIC
+   #endif
    SharedArray( Element* _data,
                    const Index _size );
+   #ifndef HAVE_MIC
+   #endif
    SharedArray( Array< Element, Device, Index >& array );
+   #ifndef HAVE_MIC
+   #endif
    SharedArray( SharedArray< Element, Device, Index >& array );
    static String getType();
@@ -113,6 +121,7 @@ class SharedArray : public Object
    __cuda_callable__ Element* getData();
     * Returns true if non-zero size is set.
@@ -123,13 +132,8 @@ class SharedArray : public Object
     * Every time one touches this grid touches * size * sizeof( Real ) bytes are added
     * to transfered bytes in tnlStatistics.
-#ifdef HAVE_NOT_CXX11
-   template< typename IndexType2 >
-   void touch( IndexType2 touches = 1 ) const;
    template< typename IndexType2 = Index >
    void touch( IndexType2 touches = 1 ) const;
    //! Method for saving the object to a file as a binary data.
    bool save( File& file ) const;
@@ -149,6 +153,7 @@ class SharedArray : public Object
    Element* data;
 template< typename Element, typename Device, typename Index >
 std::ostream& operator << ( std::ostream& str, const SharedArray< Element, Device, Index >& v );
diff --git a/src/TNL/Containers/SharedArray_impl.h b/src/TNL/Containers/SharedArray_impl.h
index 558552a803baee2f454f617538d8a8b2d892d08f..44eb48a02177e01f4ed553fab33f5bfa6dd40100 100644
--- a/src/TNL/Containers/SharedArray_impl.h
+++ b/src/TNL/Containers/SharedArray_impl.h
@@ -21,10 +21,13 @@
 namespace TNL {
 namespace Containers {   
 template< typename Element,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedArray< Element, Device, Index >::SharedArray()
 : size( 0 ), data( 0 )
@@ -33,7 +36,9 @@ SharedArray< Element, Device, Index >::SharedArray()
 template< typename Element,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedArray< Element, Device, Index >::SharedArray( Element* _data,
                                                           const Index _size )
@@ -43,7 +48,9 @@ SharedArray< Element, Device, Index >::SharedArray( Element* _data,
 template< typename Element,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedArray< Element, Device, Index >::SharedArray( Array< Element, Device, Index >& array )
    this->bind( array );
@@ -52,7 +59,9 @@ SharedArray< Element, Device, Index >::SharedArray( Array< Element, Device, Inde
 template< typename Element,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedArray< Element, Device, Index >::SharedArray( SharedArray< Element, Device, Index >& array )
    this->bind( array );
@@ -352,11 +361,7 @@ bool SharedArray< Element, Device, Index > :: save( File& file ) const
               std::cerr << "You try to save empty array." << std::endl );
    if( ! Object :: save( file ) )
       return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< const Index, Devices::Host >( &this->size ) )
    if( ! file. write( &this->size ) )
       return false;
    if( ! file. write< Element, Device, Index >( this->data, this->size ) )
@@ -382,13 +387,8 @@ bool SharedArray< Element, Device, Index > :: load( File& file )
    if( ! Object :: load( file ) )
       return false;
    Index _size;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< Index, Devices::Host >( &_size ) )
-      return false;
    if( ! file. read( &_size, 1 ) )
       return false;
    if( _size != this->size )
       std::cerr << "Error: The size " << _size << " of the data to be load is different from the " <<
diff --git a/src/TNL/Containers/SharedVector.h b/src/TNL/Containers/SharedVector.h
index c4a22ded20ba860502c408c3ddd038966146064c..6526f1b4b7aac8241325f70a866a803b20e98158 100644
--- a/src/TNL/Containers/SharedVector.h
+++ b/src/TNL/Containers/SharedVector.h
@@ -39,17 +39,25 @@ class SharedVector : public Containers::SharedArray< Real, Device, Index >
    typedef SharedVector< Real, Devices::Cuda, Index > CudaType;
+   #ifndef HAVE_MIC
+   #endif
+   #ifndef HAVE_MIC
+   #endif
    SharedVector( Real* data,
                     const Index size );
+   #ifndef HAVE_MIC
+   #endif
    SharedVector( Vector< Real, Device, Index >& vector );
+   #ifndef HAVE_MIC
+   #endif
    SharedVector( SharedVector< Real, Device, Index >& vector );
    static String getType();
diff --git a/src/TNL/Containers/SharedVector_impl.h b/src/TNL/Containers/SharedVector_impl.h
index 869f0aeae455b9b3891d9147f8d3ea4285503a02..446612f133408a6560b2e89dcaf60e4d35259ed6 100644
--- a/src/TNL/Containers/SharedVector_impl.h
+++ b/src/TNL/Containers/SharedVector_impl.h
@@ -19,7 +19,9 @@ namespace Containers {
 template< typename Real,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedVector< Real, Device, Index >::SharedVector()
@@ -27,7 +29,9 @@ SharedVector< Real, Device, Index >::SharedVector()
 template< typename Real,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedVector< Real, Device, Index >::SharedVector( Real* data,
                                                          const Index size )
 : Containers::SharedArray< Real, Device, Index >( data, size )
@@ -37,7 +41,9 @@ SharedVector< Real, Device, Index >::SharedVector( Real* data,
 template< typename Real,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedVector< Real, Device, Index >::SharedVector( Vector< Real, Device, Index >& vector )
 : Containers::SharedArray< Real, Device, Index >( vector )
@@ -46,7 +52,9 @@ SharedVector< Real, Device, Index >::SharedVector( Vector< Real, Device, Index >
 template< typename Real,
           typename Device,
           typename Index >
+#ifndef HAVE_MIC
 SharedVector< Real, Device, Index >::SharedVector( SharedVector< Real, Device, Index >& vector )
 : Containers::SharedArray< Real, Device, Index >( vector )
diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h
index 63ac6be40399a2ccec1948fc54bfaf5a32181ade..32f4528dbf5aaedf1c9561c849e42747bd5f1e4a 100644
--- a/src/TNL/Containers/StaticArray.h
+++ b/src/TNL/Containers/StaticArray.h
@@ -16,10 +16,6 @@
 namespace TNL {
 namespace Containers {   
-//! Aliases for the coordinates
-// TODO: Remove this - it is here only because of some legact code
-enum { tnlX = 0, tnlY, tnlZ };
 template< int Size, typename Element >
 class StaticArray
@@ -31,6 +27,9 @@ class StaticArray
    inline StaticArray();
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    inline StaticArray( const Element v[ Size ] );
@@ -91,7 +90,6 @@ class StaticArray
    Element data[ Size ];
 template< typename Element >
@@ -105,6 +103,9 @@ class StaticArray< 1, Element >
    inline StaticArray();
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    inline StaticArray( const Element v[ size ] );
@@ -186,6 +187,9 @@ class StaticArray< 2, Element >
    inline StaticArray();
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    inline StaticArray( const Element v[ size ] );
@@ -278,6 +282,9 @@ class StaticArray< 3, Element >
    inline StaticArray();
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    inline StaticArray( const Element v[ size ] );
@@ -365,7 +372,6 @@ class StaticArray< 3, Element >
    Element data[ size ];
 template< int Size, typename Element >
@@ -378,4 +384,3 @@ std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Element
 #include <TNL/Containers/StaticArray1D_impl.h>
 #include <TNL/Containers/StaticArray2D_impl.h>
 #include <TNL/Containers/StaticArray3D_impl.h>
diff --git a/src/TNL/Containers/StaticArray1D_impl.h b/src/TNL/Containers/StaticArray1D_impl.h
index 0b368dbf58c188611fa32173cfb49b6210f98f35..a23849cca338b609ad65a3c68d99d0c712ccf099 100644
--- a/src/TNL/Containers/StaticArray1D_impl.h
+++ b/src/TNL/Containers/StaticArray1D_impl.h
@@ -11,6 +11,7 @@
 #pragma once
 #include <TNL/param-types.h>
+#include <TNL/Containers/StaticArray.h>
 namespace TNL {
 namespace Containers {   
@@ -22,6 +23,7 @@ inline StaticArray< 1, Element >::StaticArray()
 template< typename Element >
+   template< typename _unused >
 inline StaticArray< 1, Element >::StaticArray( const Element v[ size ] )
@@ -77,8 +79,8 @@ template< typename Element >
 inline const Element& StaticArray< 1, Element >::operator[]( int i ) const
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -86,8 +88,8 @@ template< typename Element >
 inline Element& StaticArray< 1, Element >::operator[]( int i )
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -171,11 +173,7 @@ bool StaticArray< 1, Element >::save( File& file ) const
 template< typename Element >
 bool StaticArray< 1, Element >::load( File& file)
-#ifdef HAVE_NOT_CXX11
-   if( ! file.read< Element, Devices::Host, int >( data, size ) )
    if( ! file.read( data, size ) )
       std::cerr << "Unable to read " << getType() << "." << std::endl;
       return false;
diff --git a/src/TNL/Containers/StaticArray2D_impl.h b/src/TNL/Containers/StaticArray2D_impl.h
index a0301a4bbf13f19abe1f78577de2c45cc2ece4c7..44ed90b0a6c42f5d9ef28cc2a72959a8c17e586e 100644
--- a/src/TNL/Containers/StaticArray2D_impl.h
+++ b/src/TNL/Containers/StaticArray2D_impl.h
@@ -12,6 +12,7 @@
 #include <TNL/param-types.h>
 #include <TNL/Math.h>
+#include <TNL/Containers/StaticArray.h>
 namespace TNL {
 namespace Containers {   
@@ -23,6 +24,7 @@ inline StaticArray< 2, Element >::StaticArray()
 template< typename Element >
+   template< typename _unused >
 inline StaticArray< 2, Element >::StaticArray( const Element v[ size ] )
@@ -89,8 +91,8 @@ template< typename Element >
 inline const Element& StaticArray< 2, Element >::operator[]( int i ) const
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -98,8 +100,8 @@ template< typename Element >
 inline Element& StaticArray< 2, Element >::operator[]( int i )
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
diff --git a/src/TNL/Containers/StaticArray3D_impl.h b/src/TNL/Containers/StaticArray3D_impl.h
index a246b2e3e6e2e1a26bca5c7c922a0b0371fb3dc1..4e89783ca84dd6279930cd92f21526a34aa54cb3 100644
--- a/src/TNL/Containers/StaticArray3D_impl.h
+++ b/src/TNL/Containers/StaticArray3D_impl.h
@@ -11,6 +11,8 @@
 #pragma once
 #include <TNL/param-types.h>
+#include <TNL/Math.h>
+#include <TNL/Containers/StaticArray.h>
 namespace TNL {
 namespace Containers {   
@@ -22,6 +24,7 @@ inline StaticArray< 3, Element >::StaticArray()
 template< typename Element >
+   template< typename _unused >
 inline StaticArray< 3, Element >::StaticArray( const Element v[ size ] )
@@ -92,8 +95,8 @@ template< typename Element >
 inline const Element& StaticArray< 3, Element >::operator[]( int i ) const
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -101,8 +104,8 @@ template< typename Element >
 inline Element& StaticArray< 3, Element >::operator[]( int i )
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
diff --git a/src/TNL/Containers/StaticArray_impl.h b/src/TNL/Containers/StaticArray_impl.h
index d651749e0f8f46012f7b968e0f8c5e6f0d1fc620..9be36764a5b8e316900db466fa1a284fcf0ac4ab 100644
--- a/src/TNL/Containers/StaticArray_impl.h
+++ b/src/TNL/Containers/StaticArray_impl.h
@@ -12,6 +12,7 @@
 #include <TNL/param-types.h>
 #include <TNL/Math.h>
+#include <TNL/Containers/StaticArray.h>
 namespace TNL {
 namespace Containers {   
@@ -23,6 +24,7 @@ inline StaticArray< Size, Element >::StaticArray()
 template< int Size, typename Element >
+   template< typename _unused >
 inline StaticArray< Size, Element >::StaticArray( const Element v[ Size ] )
@@ -81,8 +83,8 @@ template< int Size, typename Element >
 inline const Element& StaticArray< Size, Element >::operator[]( int i ) const
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -90,8 +92,8 @@ template< int Size, typename Element >
 inline Element& StaticArray< Size, Element >::operator[]( int i )
-   TNL_ASSERT( i >= 0 && i < size,
-            std::cerr << "i = " << i << " size = " << size << std::endl; );
+   TNL_ASSERT_GE( i, 0, "Element index must be non-negative." );
+   TNL_ASSERT_LT( i, size, "Element index is out of bounds." );
    return data[ i ];
@@ -203,12 +205,9 @@ std::ostream& StaticArray< Size, Element >::write( std::ostream& str, const char
 template< int Size, typename Element >
 std::ostream& operator << ( std::ostream& str, const StaticArray< Size, Element >& a )
-   a.write( str, "," );
-   /*for( int i = 0; i < Size - 1; i ++ )
-   {
-      str << a[ i ] << ", ";
-   }
-   str << a[ Size - 1 ];*/
+   str << "[ ";
+   a.write( str, ", " );
+   str << " ]";
    return str;
diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h
index a75f34d3b88f0390caec5ea0e10bd60c810bb8f0..095c9c1ffefd2c9364562f6e9a0d65ee15c33387 100644
--- a/src/TNL/Containers/StaticVector.h
+++ b/src/TNL/Containers/StaticVector.h
@@ -11,23 +11,27 @@
 #pragma once
 #include <TNL/Containers/StaticArray.h>
+#include <TNL/Config/ParameterContainer.h>
 namespace TNL {
 namespace Containers {   
 template< int Size, typename Real = double >
-class StaticVector : public Containers::StaticArray< Size, Real >
+class StaticVector : public StaticArray< Size, Real >
    typedef Real RealType;
    typedef StaticVector< Size, Real > ThisType;
    enum { size = Size };
-   using Containers::StaticArray< Size, Real >::operator=;
+   using StaticArray< Size, Real >::operator=;
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    StaticVector( const Real v[ Size ] );
@@ -38,7 +42,7 @@ class StaticVector : public Containers::StaticArray< Size, Real >
    //! Copy constructor
    StaticVector( const StaticVector< Size, Real >& v );
    bool setup( const Config::ParameterContainer& parameters,
                const String& prefix = "" );      
@@ -87,25 +91,33 @@ class StaticVector : public Containers::StaticArray< Size, Real >
    template< typename OtherReal >
    operator StaticVector< Size, OtherReal >() const;
    ThisType abs() const;
    Real lpNorm( const Real& p ) const;
 template< typename Real >
-class StaticVector< 1, Real > : public Containers::StaticArray< 1, Real >
+class StaticVector< 1, Real > : public StaticArray< 1, Real >
    typedef Real RealType;
    typedef StaticVector< 1, Real > ThisType;
    enum { size = 1 };
+   using StaticArray< 1, Real >::operator=;
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
+   __cuda_callable__
+   StaticVector( const Real v[ 1 ] );
    //! This sets all vector components to v
    StaticVector( const Real& v );
@@ -165,22 +177,27 @@ class StaticVector< 1, Real > : public Containers::StaticArray< 1, Real >
    ThisType abs() const;
    Real lpNorm( const Real& p ) const;   
 template< typename Real >
-class StaticVector< 2, Real > : public Containers::StaticArray< 2, Real >
+class StaticVector< 2, Real > : public StaticArray< 2, Real >
    typedef Real RealType;
    typedef StaticVector< 2, Real > ThisType;
    enum { size = 2 };
+   using StaticArray< 2, Real >::operator=;
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    StaticVector( const Real v[ 2 ] );
@@ -246,22 +263,27 @@ class StaticVector< 2, Real > : public Containers::StaticArray< 2, Real >
    ThisType abs() const;
    Real lpNorm( const Real& p ) const;   
 template< typename Real >
-class StaticVector< 3, Real > : public Containers::StaticArray< 3, Real >
+class StaticVector< 3, Real > : public StaticArray< 3, Real >
    typedef Real RealType;
    typedef StaticVector< 3, Real > ThisType;
    enum { size = 3 };
+   using StaticArray< 3, Real >::operator=;
+   // Note: the template avoids ambiguity of overloaded functions with literal 0 and pointer
+   // reference: https://stackoverflow.com/q/4610503
+   template< typename _unused = void >
    StaticVector( const Real v[ 3 ] );
@@ -327,7 +349,7 @@ class StaticVector< 3, Real > : public Containers::StaticArray< 3, Real >
    ThisType abs() const;
    Real lpNorm( const Real& p ) const;   
@@ -362,21 +384,21 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u,
    p[ 1 ] = u[ 2 ] * v[ 0 ] - u[ 0 ] * v[ 2 ];
    p[ 2 ] = u[ 0 ] * v[ 1 ] - u[ 1 ] * v[ 0 ];
    return p;
 template< typename Real >
 Real tnlScalarProduct( const StaticVector< 2, Real >& u,
                        const StaticVector< 2, Real >& v )
    return u[ 0 ] * v[ 0 ] + u[ 1 ] * v[ 1 ];
 template< typename Real >
 Real tnlScalarProduct( const StaticVector< 3, Real >& u,
                        const StaticVector< 3, Real >& v )
    return u[ 0 ] * v[ 0 ] + u[ 1 ] * v[ 1 ] + u[ 2 ] * v[ 2 ];
 template< typename Real >
 Real tnlTriangleArea( const StaticVector< 2, Real >& a,
@@ -392,8 +414,8 @@ Real tnlTriangleArea( const StaticVector< 2, Real >& a,
    u2. z() = 0;
    const StaticVector< 3, Real > v = VectorProduct( u1, u2 );
-   return 0.5 * ::sqrt( tnlScalarProduct( v, v ) );
+   return 0.5 * TNL::sqrt( tnlScalarProduct( v, v ) );
 template< typename Real >
 Real tnlTriangleArea( const StaticVector< 3, Real >& a,
@@ -409,8 +431,8 @@ Real tnlTriangleArea( const StaticVector< 3, Real >& a,
    u2. z() = c. z() - a. z();
    const StaticVector< 3, Real > v = VectorProduct( u1, u2 );
-   return 0.5 * ::sqrt( tnlScalarProduct( v, v ) );
+   return 0.5 * TNL::sqrt( tnlScalarProduct( v, v ) );
 } // namespace Containers
 } // namespace TNL
diff --git a/src/TNL/Containers/StaticVector1D_impl.h b/src/TNL/Containers/StaticVector1D_impl.h
index ab2391eddacb43a5667d7c82d9c120a940bb1e6e..3127a621910111ff1490ccdc94675c8e737d7fc8 100644
--- a/src/TNL/Containers/StaticVector1D_impl.h
+++ b/src/TNL/Containers/StaticVector1D_impl.h
@@ -10,6 +10,8 @@
 #pragma once 
+#include <TNL/Containers/StaticVector.h>
 namespace TNL {
 namespace Containers {   
@@ -19,17 +21,25 @@ StaticVector< 1, Real >::StaticVector()
+template< typename Real >
+   template< typename _unused >
+StaticVector< 1, Real >::StaticVector( const Real v[ 1 ] )
+: StaticArray< 1, Real >( v )
 template< typename Real >
 StaticVector< 1, Real >::StaticVector( const Real& v )
-: Containers::StaticArray< 1, Real >( v )
+: StaticArray< 1, Real >( v )
 template< typename Real >
 StaticVector< 1, Real >::StaticVector( const StaticVector< 1, Real >& v )
-: Containers::StaticArray< 1, Real >( v )
+: StaticArray< 1, Real >( v )
diff --git a/src/TNL/Containers/StaticVector2D_impl.h b/src/TNL/Containers/StaticVector2D_impl.h
index d5f63f283129ae11e50fcdd4352cff91eda26ea3..d66979bf7a3be3bed89929b3d84cdf5da61199d4 100644
--- a/src/TNL/Containers/StaticVector2D_impl.h
+++ b/src/TNL/Containers/StaticVector2D_impl.h
@@ -10,7 +10,7 @@
 #pragma once 
-#include <TNL/Math.h>
+#include <TNL/Containers/StaticVector.h>
 namespace TNL {
 namespace Containers {   
@@ -22,30 +22,31 @@ StaticVector< 2, Real >::StaticVector()
 template< typename Real >
+   template< typename _unused >
 StaticVector< 2, Real >::StaticVector( const Real v[ 2 ] )
-: Containers::StaticArray< 2, Real >( v )
+: StaticArray< 2, Real >( v )
 template< typename Real >
 StaticVector< 2, Real >::StaticVector( const Real& v )
-: Containers::StaticArray< 2, Real >( v )
+: StaticArray< 2, Real >( v )
 template< typename Real >
 StaticVector< 2, Real >::StaticVector( const Real& v1, const Real& v2 )
-: Containers::StaticArray< 2, Real >( v1, v2 )
+: StaticArray< 2, Real >( v1, v2 )
 template< typename Real >
 StaticVector< 2, Real >::StaticVector( const StaticVector< 2, Real >& v )
-: Containers::StaticArray< 2, Real >( v )
+: StaticArray< 2, Real >( v )
@@ -183,8 +184,8 @@ __cuda_callable__
 StaticVector< 2, Real >
 StaticVector< 2, Real >::abs() const
-   return StaticVector< 2, Real >( ::abs( this->data[ 0 ] ),
-                                      ::abs( this->data[ 1 ] ) );
+   return StaticVector< 2, Real >( TNL::abs( this->data[ 0 ] ),
+                                   TNL::abs( this->data[ 1 ] ) );
 template< typename Real >
@@ -195,10 +196,10 @@ StaticVector< 2, Real >::lpNorm( const Real& p ) const
    if( p == 1.0 )
       return TNL::abs( this->data[ 0 ] ) + TNL::abs( this->data[ 1 ] );
    if( p == 2.0 )
-      return std::sqrt( this->data[ 0 ] * this->data[ 0 ] + 
+      return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + 
                         this->data[ 1 ] * this->data[ 1 ] );
-   return std::pow( std::pow( TNL::abs( this->data[ 0 ] ), p ) +
-                    std::pow( TNL::abs( this->data[ 1 ] ), p ), 1.0 / p ); 
+   return TNL::pow( TNL::pow( TNL::abs( this->data[ 0 ] ), p ) +
+                    TNL::pow( TNL::abs( this->data[ 1 ] ), p ), 1.0 / p ); 
diff --git a/src/TNL/Containers/StaticVector3D_impl.h b/src/TNL/Containers/StaticVector3D_impl.h
index 68f5802e2772884305e3a6e6e50db913e1b8319d..d01e82077afd4feba62657d769803a68e1fe8fa1 100644
--- a/src/TNL/Containers/StaticVector3D_impl.h
+++ b/src/TNL/Containers/StaticVector3D_impl.h
@@ -10,6 +10,8 @@
 #pragma once
+#include <TNL/Containers/StaticVector.h>
 namespace TNL {
 namespace Containers {   
@@ -20,30 +22,31 @@ StaticVector< 3, Real >::StaticVector()
 template< typename Real >
+   template< typename _unused >
 StaticVector< 3, Real >::StaticVector( const Real v[ 3 ] )
-: Containers::StaticArray< 3, Real >( v )
+: StaticArray< 3, Real >( v )
 template< typename Real >
 StaticVector< 3, Real >::StaticVector( const Real& v )
-: Containers::StaticArray< 3, Real >( v )
+: StaticArray< 3, Real >( v )
 template< typename Real >
 StaticVector< 3, Real >::StaticVector( const Real& v1, const Real& v2, const Real& v3 )
-: Containers::StaticArray< 3, Real >( v1, v2, v3 )
+: StaticArray< 3, Real >( v1, v2, v3 )
 template< typename Real >
 StaticVector< 3, Real >::StaticVector( const StaticVector< 3, Real >& v )
-: Containers::StaticArray< 3, Real >( v )
+: StaticArray< 3, Real >( v )
@@ -175,6 +178,7 @@ bool StaticVector< 3, Real >::operator >= ( const StaticVector& v ) const
             this->data[ 1 ] >= v[ 1 ] &&
             this->data[ 2 ] >= v[ 2 ] );
 template< typename Real >
    template< typename OtherReal >
@@ -193,9 +197,9 @@ __cuda_callable__
 StaticVector< 3, Real >
 StaticVector< 3, Real >::abs() const
-   return StaticVector< 3, Real >( ::abs( this->data[ 0 ] ),
-                                      ::abs( this->data[ 1 ] ),
-                                      ::abs( this->data[ 2 ] ) );
+   return StaticVector< 3, Real >( TNL::abs( this->data[ 0 ] ),
+                                   TNL::abs( this->data[ 1 ] ),
+                                   TNL::abs( this->data[ 2 ] ) );
 template< typename Real >
@@ -208,12 +212,12 @@ StaticVector< 3, Real >::lpNorm( const Real& p ) const
              TNL::abs( this->data[ 1 ] ) + 
              TNL::abs( this->data[ 2 ] );
    if( p == 2.0 )
-      return std::sqrt( this->data[ 0 ] * this->data[ 0 ] + 
+      return TNL::sqrt( this->data[ 0 ] * this->data[ 0 ] + 
                         this->data[ 1 ] * this->data[ 1 ] +
                         this->data[ 2 ] * this->data[ 2 ] );
-   return std::pow( std::pow( TNL::abs( this->data[ 0 ] ), p ) +
-                    std::pow( TNL::abs( this->data[ 1 ] ), p ) +
-                    std::pow( TNL::abs( this->data[ 2 ] ), p ), 1.0 / p ); 
+   return TNL::pow( TNL::pow( TNL::abs( this->data[ 0 ] ), p ) +
+                    TNL::pow( TNL::abs( this->data[ 1 ] ), p ) +
+                    TNL::pow( TNL::abs( this->data[ 2 ] ), p ), 1.0 / p ); 
diff --git a/src/TNL/Containers/StaticVector_impl.h b/src/TNL/Containers/StaticVector_impl.h
index 8c8d37b9c0c0e5b3f58a5f182f75f2837eddf156..45e680a098f36aeeed127295f210dcad5d629d28 100644
--- a/src/TNL/Containers/StaticVector_impl.h
+++ b/src/TNL/Containers/StaticVector_impl.h
@@ -10,7 +10,7 @@
 #pragma once
-#include <TNL/Config/ParameterContainer.h>
+#include <TNL/Containers/StaticVector.h>
 namespace TNL {
 namespace Containers {   
@@ -22,23 +22,24 @@ StaticVector< Size, Real >::StaticVector()
 template< int Size, typename Real >
+   template< typename _unused >
 StaticVector< Size, Real >::StaticVector( const Real v[ Size ] )
-: Containers::StaticArray< Size, Real >( v )
+: StaticArray< Size, Real >( v )
 template< int Size, typename Real >
 StaticVector< Size, Real >::StaticVector( const Real& v )
-: Containers::StaticArray< Size, Real >( v )
+: StaticArray< Size, Real >( v )
 template< int Size, typename Real >
 StaticVector< Size, Real >::StaticVector( const StaticVector< Size, Real >& v )
-: Containers::StaticArray< Size, Real >( v )
+: StaticArray< Size, Real >( v )
@@ -210,12 +211,12 @@ StaticVector< Size, Real >::lpNorm( const Real& p ) const
       Real aux = this->data[ 0 ] * this->data[ 0 ];
       for( int i = 1; i < Size; i++ )
          aux += this->data[ i ] * this->data[ i ];
-      return std::sqrt( aux );
+      return TNL::sqrt( aux );
-   Real aux = std::pow( TNL::abs( this->data[ 0 ] ), p );
+   Real aux = TNL::pow( TNL::abs( this->data[ 0 ] ), p );
    for( int i = 1; i < Size; i++ )
-      aux += std::pow( TNL::abs( this->data[ i ] ), p );
-   return std::pow( aux, 1.0 / p );
+      aux += TNL::pow( TNL::abs( this->data[ i ] ), p );
+   return TNL::pow( aux, 1.0 / p );
 template< int Size, typename Real, typename Scalar >
diff --git a/src/TNL/Curve.h b/src/TNL/Curve.h
index 6704b8a98865aabf36a4b5b8ce971c32c1802ff6..f6f0408db82016340bbce152de4fdeb3111ab4f8 100644
--- a/src/TNL/Curve.h
+++ b/src/TNL/Curve.h
@@ -35,36 +35,20 @@ class CurveElement
    bool save( File& file ) const
-#ifdef HAVE_NOT_CXX11
-      if( ! file. write< const T, Devices::Host >( &position ) )
-         return false;
-      if( ! file. write< const bool, Devices::Host >( &separator ) )
-         return false;
-      return true;
       if( ! file. write( &position ) )
          return false;
       if( ! file. write( &separator ) )
          return false;
       return true;
    bool load( File& file )
-#ifdef HAVE_NOT_CXX11
-      if( ! file. read< T, Devices::Host >( &position ) )
-         return false;
-      if( ! file. read< bool, Devices::Host >( &separator ) )
-         return false;
-      return true;
       if( ! file. read( &position ) )
          return false;
       if( ! file. read( &separator ) )
          return false;
       return true;
    T position;
@@ -182,7 +166,7 @@ template< class T > bool Write( const Curve< T >& curve,
    if( strncmp( format, "tnl",3 ) == 0 )
       File file;
-      if( ! file. open( String( file_name ) + String( ".tnl" ), tnlWriteMode ) )
+      if( ! file. open( String( file_name ) + String( ".tnl" ), IOMode::write ) )
          std::cerr << "I am not able to open the file " << file_name << " for drawing curve." << std::endl;
          return false;
@@ -218,7 +202,7 @@ template< class T > bool Read( Curve< T >& crv,
                                const char* input_file )
    File file;
-   if( ! file. open( String( input_file ), tnlReadMode  ) )
+   if( ! file. open( String( input_file ), IOMode::read  ) )
      std::cout << " unable to open file " << input_file << std::endl;
       return false;
diff --git a/src/TNL/Debugging/CMakeLists.txt b/src/TNL/Debugging/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/DevicePointer.h b/src/TNL/DevicePointer.h
index 3a8bd43ff2a03e390446156952fc6571be362454..956973529cbc39ad4e9bc42366ae118298b03ca2 100644
--- a/src/TNL/DevicePointer.h
+++ b/src/TNL/DevicePointer.h
@@ -18,6 +18,8 @@
 #include <cstring>
+#include "Devices/MIC.h"
 namespace TNL {
@@ -113,11 +115,18 @@ class DevicePointer< Object, Devices::Host > : public SmartPointer
          return *( this->pointer );
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pointer;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pointer;
+      }
       template< typename Device = Devices::Host >
       const Object& getData() const
@@ -280,11 +289,18 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
          return *( this->pointer );
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pd;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pd;
+      }
       template< typename Device = Devices::Host >
       const Object& getData() const
@@ -378,7 +394,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
             TNL_ASSERT( this->pointer, );
             TNL_ASSERT( this->cuda_pointer, );
             cudaMemcpy( (void*) this->cuda_pointer, (void*) this->pointer, sizeof( ObjectType ), cudaMemcpyHostToDevice );
-            if( ! checkCudaDevice ) {
+            if( ! TNL_CHECK_CUDA_DEVICE ) {
                return false;
@@ -409,12 +425,8 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
          this->pointer = &obj;
          this->pd = new PointerData();
-         if( ! this->pd )
-            return false;
          // pass to device
          this->cuda_pointer = Devices::Cuda::passToDevice( *this->pointer );
-         if( ! this->cuda_pointer )
-            return false;
          // set last-sync state
          Devices::Cuda::insertSmartPointer( this );
@@ -462,4 +474,304 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
+ * Specialization for MIC
+ */
+#ifdef HAVE_MIC
+template< typename Object >
+class DevicePointer< Object, Devices::MIC > : public SmartPointer
+   private:
+      // Convenient template alias for controlling the selection of copy- and
+      // move-constructors and assignment operators using SFINAE.
+      // The type Object_ is "enabled" iff Object_ and Object are not the same,
+      // but after removing const and volatile qualifiers they are the same.
+      template< typename Object_ >
+      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
+                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
+      // friend class will be needed for templated assignment operators
+      template< typename Object_, typename Device_ >
+      friend class DevicePointer;
+   public:
+      typedef Object ObjectType;
+      typedef Devices::MIC DeviceType;
+      typedef DevicePointer< Object, Devices::MIC > ThisType;
+      explicit  DevicePointer( ObjectType& obj )
+      : pointer( nullptr ),
+        pd( nullptr ),
+        mic_pointer( nullptr )
+      {
+         this->allocate( obj );
+      }
+      // this is needed only to avoid the default compiler-generated constructor
+      DevicePointer( const ThisType& pointer )
+      : pointer( pointer.pointer ),
+        pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      DevicePointer( const DevicePointer< Object_, DeviceType >& pointer )
+      : pointer( pointer.pointer ),
+        pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+      // this is needed only to avoid the default compiler-generated constructor
+      DevicePointer( ThisType&& pointer )
+      : pointer( pointer.pointer ),
+        pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pointer = nullptr;
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      DevicePointer( DevicePointer< Object_, DeviceType >&& pointer )
+      : pointer( pointer.pointer ),
+        pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pointer = nullptr;
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+      const Object* operator->() const
+      {
+         return this->pointer;
+      }
+      Object* operator->()
+      {
+         this->pd->maybe_modified = true;
+         return this->pointer;
+      }
+      const Object& operator *() const
+      {
+         return *( this->pointer );
+      }
+      Object& operator *()
+      {
+         this->pd->maybe_modified = true;
+         return *( this->pointer );
+      }
+      operator bool()
+      {
+         return this->pd;
+      }
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      const Object& getData() const
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pointer, );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+            return *( this->pointer );
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+      }
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      Object& modifyData()
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pointer, );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+         {
+            this->pd->maybe_modified = true;
+            return *( this->pointer );
+         }
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+      }
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( const ThisType& ptr )
+      {
+         this->free();
+         this->pointer = ptr.pointer;
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         this->pd->counter += 1;
+         return *this;
+      }
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( const DevicePointer< Object_, DeviceType >& ptr )
+      {
+         this->free();
+         this->pointer = ptr.pointer;
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         this->pd->counter += 1;
+         return *this;
+      }
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( ThisType&& ptr )
+      {
+         this->free();
+         this->pointer = ptr.pointer;
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pointer = nullptr;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+         return *this;
+      }
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( DevicePointer< Object_, DeviceType >&& ptr )
+      {
+         this->free();
+         this->pointer = ptr.pointer;
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pointer = nullptr;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+         return *this;
+      }
+      bool synchronize()
+      {
+         if( ! this->pd )
+            return true;
+         if( this->modified() )
+         {
+            TNL_ASSERT( this->pointer, );
+            TNL_ASSERT( this->mic_pointer, );
+            Devices::MIC::CopyToMIC((void*) this->mic_pointer, (void*) this->pointer, sizeof( ObjectType ));
+            this->set_last_sync_state();
+            return true;
+         }
+         return true;
+      }
+      ~DevicePointer()
+      {
+         this->free();
+         Devices::MIC::removeSmartPointer( this );
+      }
+   protected:
+      struct PointerData
+      {
+         char data_image[ sizeof(Object) ];
+         int counter = 1;
+         bool maybe_modified = false;
+      };
+      bool allocate( ObjectType& obj )
+      {
+         this->pointer = &obj;
+         this->pd = new PointerData();
+         if( ! this->pd )
+            return false;
+         // pass to device
+         this->mic_pointer = (ObjectType*)Devices::MIC::AllocMIC(sizeof(ObjectType));
+         if( ! this->mic_pointer )
+            return false;
+         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*)this->pointer,sizeof(ObjectType));
+         // set last-sync state
+         this->set_last_sync_state();
+         Devices::MIC::insertSmartPointer( this );
+         return true;
+      }
+      void set_last_sync_state()
+      {
+         TNL_ASSERT( this->pointer, );
+         TNL_ASSERT( this->pd, );
+         std::memcpy( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) );
+         this->pd->maybe_modified = false;
+      }
+      bool modified()
+      {
+         TNL_ASSERT( this->pointer, );
+         TNL_ASSERT( this->pd, );
+         // optimization: skip bitwise comparison if we're sure that the data is the same
+         if( ! this->pd->maybe_modified )
+            return false;
+         return std::memcmp( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) ) != 0;
+      }
+      void free()
+      {
+         if( this->pd )
+         {
+            if( ! --this->pd->counter )
+            {
+               delete this->pd;
+               this->pd = nullptr;
+               if( this->mic_pointer )
+                  Devices::MIC::FreeMIC( (void*) this->mic_pointer );
+            }
+         }
+      }
+      Object* pointer;
+      PointerData* pd;
+      // mic_pointer can't be part of PointerData structure, since we would be
+      // unable to dereference this-pd on the device
+      Object* mic_pointer;
+#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
+namespace Assert {
+template< typename Object, typename Device >
+struct Formatter< DevicePointer< Object, Device > >
+   static std::string
+   printToString( const DevicePointer< Object, Device >& value )
+   {
+      ::std::stringstream ss;
+      ss << "(DevicePointer< " << Object::getType() << ", " << Device::getDeviceType()
+         << " > object at " << &value << ")";
+      return ss.str();
+   }
+} // namespace Assert
 } // namespace TNL
diff --git a/src/TNL/Devices/CMakeLists.txt b/src/TNL/Devices/CMakeLists.txt
old mode 100755
new mode 100644
index f09e2b47947ea21af93619167ea415f6b62491e6..7b4babe28e75b075edecad776b68cd18983aa0bb
--- a/src/TNL/Devices/CMakeLists.txt
+++ b/src/TNL/Devices/CMakeLists.txt
@@ -1,12 +1,15 @@
 set (headers Cuda.h
+             CudaCallable.h
-             Host.h )
+             Host.h
+             MIC.h )
 set( common_SOURCES
-     ${CURRENT_DIR}/Host.cpp )
+     ${CURRENT_DIR}/Host.cpp 
+     ${CURRENT_DIR}/MIC.cpp )
    set( tnl_devices_CUDA__SOURCES
diff --git a/src/TNL/Devices/Cuda.cu b/src/TNL/Devices/Cuda.cu
index 0e59e2da5c75ede9f037a470bc16fdacaadcb855..2605e6dca83290eb59db54618b7bf91ed1e59150 100644
--- a/src/TNL/Devices/Cuda.cu
+++ b/src/TNL/Devices/Cuda.cu
@@ -9,6 +9,7 @@
 /* See Copyright Notice in tnl/Copyright */
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaRuntimeError.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
@@ -106,382 +107,7 @@ bool Cuda::checkDevice( const char* file_name, int line, cudaError error )
    if( error == cudaSuccess )
       return true;
-   std::cerr << "CUDA ERROR(" << error << ") at line " << line << " in " << file_name << ":" << std::endl;
-   std::cerr << cudaGetErrorString( error )  << std::endl;   
-   std::cerr << "Detailed description is: " << std::endl;
-   switch( error )
-   {
-      // 1
-      case cudaErrorMissingConfiguration:
-         std::cerr
-            << "The device function being invoked (usually via ::cudaLaunch()) was not " << std::endl
-            << "previously configured via the ::cudaConfigureCall() function. " << std::endl;
-         break;
-      // 2
-      case cudaErrorMemoryAllocation:
-         std::cerr
-            << "The API call failed because it was unable to allocate enough memory to " << std::endl
-            << "perform the requested operation. " << std::endl;
-         break;
-      // 3
-      case cudaErrorInitializationError:
-         std::cerr
-            << "The API call failed because the CUDA driver and runtime could not be " << std::endl
-            << "initialized. " << std::endl;
-         break;
-      // 4
-      case cudaErrorLaunchFailure:
-         std::cerr
-            << "An exception occurred on the device while executing a kernel. Common " << std::endl
-            << "causes include dereferencing an invalid device pointer and accessing " << std::endl
-            << "out of bounds shared memory. The device cannot be used until " << std::endl
-            << "::cudaThreadExit() is called. All existing device memory allocations " << std::endl
-            << "are invalid and must be reconstructed if the program is to continue " << std::endl
-            << "using CUDA. " << std::endl;
-         break;
-      // 5
-      case cudaErrorPriorLaunchFailure:
-         std::cerr
-            << "This indicated that a previous kernel launch failed. This was previously " << std::endl
-            << "used for device emulation of kernel launches. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      // 6
-      case cudaErrorLaunchTimeout:
-         std::cerr
-            << "This indicates that the device kernel took too long to execute. This can " << std::endl
-            << "only occur if timeouts are enabled - see the device property " << std::endl
-            << "ref ::cudaDeviceProp::kernelExecTimeoutEnabled \"kernelExecTimeoutEnabled\" " << std::endl
-            << "for more information. The device cannot be used until ::cudaThreadExit() " << std::endl
-            << "is called. All existing device memory allocations are invalid and must be " << std::endl
-            << "reconstructed if the program is to continue using CUDA. " << std::endl;
-         break;
-      // 7
-      case cudaErrorLaunchOutOfResources:
-         std::cerr
-            << "This indicates that a launch did not occur because it did not have " << std::endl
-            << "appropriate resources. Although this error is similar to " << std::endl
-            << "::cudaErrorInvalidConfiguration, this error usually indicates that the " << std::endl
-            << "user has attempted to pass too many arguments to the device kernel, or the " << std::endl
-            << "kernel launch specifies too many threads for the kernel's register count. " << std::endl;
-         break;
-      // 8
-      case cudaErrorInvalidDeviceFunction:
-         std::cerr
-            << "The requested device function does not exist or is not compiled for the " << std::endl
-            << "proper device architecture. " << std::endl;
-         break;
-      // 9
-      case cudaErrorInvalidConfiguration:
-         std::cerr
-            << "This indicates that a kernel launch is requesting resources that can " << std::endl
-            << "never be satisfied by the current device. Requesting more shared memory " << std::endl
-            << "per block than the device supports will trigger this error, as will " << std::endl
-            << "requesting too many threads or blocks. See ::cudaDeviceProp for more " << std::endl
-            << "device limitations. " << std::endl;
-         break;
-      // 10
-      case cudaErrorInvalidDevice:
-         std::cerr
-            << "This indicates that the device ordinal supplied by the user does not " << std::endl
-            << "correspond to a valid CUDA device. " << std::endl;
-         break;
-      // 11
-      case cudaErrorInvalidValue:
-         std::cerr
-            << "This indicates that one or more of the parameters passed to the API call " << std::endl
-            << "is not within an acceptable range of values. " << std::endl;
-         break;
-      // 12
-      case cudaErrorInvalidPitchValue:
-         std::cerr
-            << "This indicates that one or more of the pitch-related parameters passed " << std::endl
-            << "to the API call is not within the acceptable range for pitch. " << std::endl;
-         break;
-      // 13
-      case cudaErrorInvalidSymbol:
-         std::cerr
-            << "This indicates that the symbol name/identifier passed to the API call " << std::endl
-            << "is not a valid name or identifier. " << std::endl;
-         break;
-      // 14
-      case cudaErrorMapBufferObjectFailed:
-         std::cerr
-            << "This indicates that the buffer object could not be mapped. " << std::endl;
-         break;
-      // 15
-      case cudaErrorUnmapBufferObjectFailed:
-         std::cerr
-            << "This indicates that the buffer object could not be unmapped. " << std::endl;
-         break;
-      // 16
-      case cudaErrorInvalidHostPointer:
-         std::cerr
-            << "This indicates that at least one host pointer passed to the API call is " << std::endl
-            << "not a valid host pointer. " << std::endl;
-         break;
-      // 17
-      case cudaErrorInvalidDevicePointer:
-         std::cerr
-            << "This indicates that at least one device pointer passed to the API call is " << std::endl
-            << "not a valid device pointer. " << std::endl;
-         break;
-      case cudaErrorInvalidTexture:
-         std::cerr
-            << "This indicates that the texture passed to the API call is not a valid " << std::endl
-            << "texture. " << std::endl;
-         break;
-      case cudaErrorInvalidTextureBinding:
-         std::cerr
-            << "This indicates that the texture binding is not valid. This occurs if you " << std::endl
-            << "call ::cudaGetTextureAlignmentOffset() with an unbound texture. " << std::endl;
-         break;
-      case cudaErrorInvalidChannelDescriptor:
-         std::cerr
-            << "This indicates that the channel descriptor passed to the API call is not " << std::endl
-            << "valid. This occurs if the format is not one of the formats specified by " << std::endl
-            << "::cudaChannelFormatKind, or if one of the dimensions is invalid. " << std::endl;
-         break;
-      case cudaErrorInvalidMemcpyDirection:
-         std::cerr
-            << "This indicates that the direction of the memcpy passed to the API call is " << std::endl
-            << "not one of the types specified by ::cudaMemcpyKind. " << std::endl;
-         break;
-      case cudaErrorAddressOfConstant:
-         std::cerr
-            << "This indicated that the user has taken the address of a constant variable, " << std::endl
-            << "which was forbidden up until the CUDA 3.1 release. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Variables in constant " << std::endl
-            << "memory may now have their address taken by the runtime via " << std::endl
-            << "::cudaGetSymbolAddress(). " << std::endl;
-         break;
-      case cudaErrorTextureFetchFailed:
-         std::cerr
-            << "This indicated that a texture fetch was not able to be performed. " << std::endl
-            << "This was previously used for device emulation of texture operations. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      case cudaErrorTextureNotBound:
-         std::cerr
-            << "This indicated that a texture was not bound for access. " << std::endl
-            << "This was previously used for device emulation of texture operations. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      case cudaErrorSynchronizationError:
-         std::cerr
-            << "This indicated that a synchronization operation had failed. " << std::endl
-            << "This was previously used for some device emulation functions. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      case cudaErrorInvalidFilterSetting:
-         std::cerr
-            << "This indicates that a non-float texture was being accessed with linear " << std::endl
-            << "filtering. This is not supported by CUDA. " << std::endl;
-         break;
-      case cudaErrorInvalidNormSetting:
-         std::cerr
-            << "This indicates that an attempt was made to read a non-float texture as a " << std::endl
-            << "normalized float. This is not supported by CUDA. " << std::endl;
-         break;
-      case cudaErrorMixedDeviceExecution:
-         std::cerr
-            << "Mixing of device and device emulation code was not allowed. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      case cudaErrorCudartUnloading:
-         std::cerr
-            << "This indicated an issue with calling API functions during the unload " << std::endl
-            << "process of the CUDA runtime in prior releases. " << std::endl
-            << "This error return is deprecated as of CUDA 3.2. " << std::endl;
-         break;
-      case cudaErrorUnknown:
-         std::cerr
-            << "This indicates that an unknown internal error has occurred. " << std::endl;
-         break;
-      case cudaErrorNotYetImplemented:
-         std::cerr
-            << "This indicates that the API call is not yet implemented. Production " << std::endl
-            << "releases of CUDA will never return this error. " << std::endl;
-         break;
-      case cudaErrorMemoryValueTooLarge:
-         std::cerr
-            << "This indicated that an emulated device pointer exceeded the 32-bit address " << std::endl
-            << "range. " << std::endl
-            << "This error return is deprecated as of CUDA 3.1. Device emulation mode was " << std::endl
-            << "removed with the CUDA 3.1 release. " << std::endl;
-         break;
-      case cudaErrorInvalidResourceHandle:
-         std::cerr
-            << "This indicates that a resource handle passed to the API call was not " << std::endl
-            << "valid. Resource handles are opaque types like ::cudaStream_t and " << std::endl
-            << "::cudaEvent_t. " << std::endl;
-         break;
-      case cudaErrorNotReady:
-         std::cerr
-            << "This indicates that asynchronous operations issued previously have not " << std::endl
-            << "completed yet. This result is not actually an error, but must be indicated " << std::endl
-            << "differently than ::cudaSuccess (which indicates completion). Calls that " << std::endl
-            << "may return this value include ::cudaEventQuery() and ::cudaStreamQuery(). " << std::endl;
-         break;
-      case cudaErrorInsufficientDriver:
-         std::cerr
-            << "This indicates that the installed NVIDIA CUDA driver is older than the " << std::endl
-            << "CUDA runtime library. This is not a supported configuration. Users should " << std::endl
-            << "install an updated NVIDIA display driver to allow the application to run. " << std::endl;
-         break;
-      case cudaErrorSetOnActiveProcess:
-         std::cerr
-            << "This indicates that the user has called ::cudaSetDevice(), " << std::endl
-            << "::cudaSetValidDevices(), ::cudaSetDeviceFlags(), " << std::endl
-            << "::cudaD3D9SetDirect3DDevice(), ::cudaD3D10SetDirect3DDevice, " << std::endl
-            << "::cudaD3D11SetDirect3DDevice(), * or ::cudaVDPAUSetVDPAUDevice() after " << std::endl
-            << "initializing the CUDA runtime by calling non-device management operations " << std::endl
-            << "(allocating memory and launching kernels are examples of non-device " << std::endl
-            << "management operations). This error can also be returned if using " << std::endl
-            << "runtime/driver interoperability and there is an existing ::CUcontext " << std::endl
-            << "active on the host thread. " << std::endl;
-         break;
-      case cudaErrorInvalidSurface:
-         std::cerr
-            << "This indicates that the surface passed to the API call is not a valid " << std::endl
-            << "surface. " << std::endl;
-         break;
-      case cudaErrorNoDevice:
-         std::cerr
-            << "This indicates that no CUDA-capable devices were detected by the installed " << std::endl
-            << "CUDA driver. " << std::endl;
-         break;
-      case cudaErrorECCUncorrectable:
-         std::cerr
-            << "This indicates that an uncorrectable ECC error was detected during " << std::endl
-            << "execution. " << std::endl;
-         break;
-      case cudaErrorSharedObjectSymbolNotFound:
-         std::cerr
-            << "This indicates that a link to a shared object failed to resolve. " << std::endl;
-         break;
-      case cudaErrorSharedObjectInitFailed:
-         std::cerr
-            << "This indicates that initialization of a shared object failed. " << std::endl;
-         break;
-      case cudaErrorUnsupportedLimit:
-         std::cerr
-            << "This indicates that the ::cudaLimit passed to the API call is not " << std::endl
-            << "supported by the active device. " << std::endl;
-         break;
-      case cudaErrorDuplicateVariableName:
-         std::cerr
-            << "This indicates that multiple global or constant variables (across separate " << std::endl
-            << "CUDA source files in the application) share the same string name. " << std::endl;
-         break;
-      case cudaErrorDuplicateTextureName:
-         std::cerr
-            << "This indicates that multiple textures (across separate CUDA source " << std::endl
-            << "files in the application) share the same string name. " << std::endl;
-         break;
-      case cudaErrorDuplicateSurfaceName:
-         std::cerr
-            << "This indicates that multiple surfaces (across separate CUDA source " << std::endl
-            << "files in the application) share the same string name. " << std::endl;
-         break;
-      case cudaErrorDevicesUnavailable:
-         std::cerr
-            << "This indicates that all CUDA devices are busy or unavailable at the current " << std::endl
-            << "time. Devices are often busy/unavailable due to use of " << std::endl
-            << "::cudaComputeModeExclusive or ::cudaComputeModeProhibited. They can also " << std::endl
-            << "be unavailable due to memory constraints on a device that already has " << std::endl
-            << "active CUDA work being performed. " << std::endl;
-         break;
-      case cudaErrorInvalidKernelImage:
-         std::cerr
-            << "This indicates that the device kernel image is invalid. " << std::endl;
-         break;
-      case cudaErrorNoKernelImageForDevice:
-         std::cerr
-            << "This indicates that there is no kernel image available that is suitable " << std::endl
-            << "for the device. This can occur when a user specifies code generation " << std::endl
-            << "options for a particular CUDA source file that do not include the " << std::endl
-            << "corresponding device configuration. " << std::endl;
-         break;
-      case cudaErrorIncompatibleDriverContext:
-         std::cerr
-            << "This indicates that the current context is not compatible with this " << std::endl
-            << "version of the CUDA Runtime. This can only occur if you are using CUDA " << std::endl
-            << "Runtime/Driver interoperability and have created an existing Driver " << std::endl
-            << "context using an older API. Please see \\ref CUDART_DRIVER " << std::endl
-            << "\"Interactions with the CUDA Driver API\" for more information. " << std::endl;
-         break;
-      case cudaErrorStartupFailure:
-         std::cerr
-            << "This indicates an internal startup failure in the CUDA runtime. " << std::endl;
-         break;
-      case cudaErrorApiFailureBase:
-         std::cerr
-            << "Any unhandled CUDA driver error is added to this value and returned via " << std::endl
-            << "the runtime. Production releases of CUDA should not return such errors. " << std::endl;
-         break;
-      default:
-         std::cerr << "(detailed description is not available)" << std::endl;
-         break;
-   }
-   throw EXIT_FAILURE;
-   return false;
+   throw Exceptions::CudaRuntimeError( error, file_name, line );
 std::ostream& operator << ( std::ostream& str, const dim3& d )
diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h
index 6333b1ab5fb4f695815515cf742d370e0a454dca..805a9c8a35845d2a48a583ecd28fc9559194e676 100644
--- a/src/TNL/Devices/Cuda.h
+++ b/src/TNL/Devices/Cuda.h
@@ -1,5 +1,5 @@
-                          Devices::Cuda.h  -  description
+                          Cuda.h  -  description
     begin                : Nov 7, 2012
     copyright            : (C) 2012 by Tomas Oberhuber
@@ -16,6 +16,7 @@
 #include <TNL/Assert.h>
 #include <TNL/SmartPointersRegister.h>
 #include <TNL/Timer.h>
+#include <TNL/Devices/CudaCallable.h>
 namespace TNL {
@@ -26,12 +27,6 @@ namespace Config {
 namespace Devices {
-#ifdef HAVE_CUDA
-#define __cuda_callable__ __device__ __host__
-#define __cuda_callable__
 class Cuda
@@ -156,7 +151,7 @@ class Cuda
     * I do not know why, but it is more reliable to pass the error code instead
     * of calling cudaGetLastError() inside the method.
-    * We recommend to use macro 'checkCudaDevice' defined bellow.
+    * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow.
    static bool checkDevice( const char* file_name, int line, cudaError error );
@@ -184,14 +179,14 @@ class Cuda
 #ifdef HAVE_CUDA
-#define checkCudaDevice ::TNL::Devices::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
-std::ostream& operator << ( std::ostream& str, const dim3& d );
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() )
-#define checkCudaDevice ::TNL::Devices::Cuda::checkDevice()
+#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice()
-#define CudaSupportMissingMessage \
-   std::cerr << "The CUDA support is missing in the source file " << __FILE__ << " at line " << __LINE__ << ". Please set WITH_CUDA=yes in the install script. " << std::endl;
+#ifdef HAVE_CUDA
+std::ostream& operator << ( std::ostream& str, const dim3& d );
 } // namespace Devices
 } // namespace TNL   
diff --git a/src/TNL/Devices/CudaCallable.h b/src/TNL/Devices/CudaCallable.h
new file mode 100644
index 0000000000000000000000000000000000000000..e0a86a3e4f01a8ae112a45c7528f77e152289b04
--- /dev/null
+++ b/src/TNL/Devices/CudaCallable.h
@@ -0,0 +1,25 @@
+                          CudaCallable.h  -  description
+                             -------------------
+    begin                : Jun 20, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+// The __cuda_callable__ macro has to be in a separate header file to avoid
+// infinite loops by the #include directives.
+// For example, the implementation of Devices::Cuda needs TNL_ASSERT_*
+// macros, which need __cuda_callable__ functions.
+#ifdef HAVE_MIC 
+   #define __cuda_callable__ __attribute__((target(mic)))
+#elif HAVE_CUDA
+   #define __cuda_callable__ __device__ __host__
+   #define __cuda_callable__
diff --git a/src/TNL/Devices/CudaDeviceInfo.cpp b/src/TNL/Devices/CudaDeviceInfo.cpp
index 45199ecda1fa986dc68d0fd5386b06c09e8b7ecc..85a6604d87bba8655aac46609d4ae0db8dc24934 100644
--- a/src/TNL/Devices/CudaDeviceInfo.cpp
+++ b/src/TNL/Devices/CudaDeviceInfo.cpp
@@ -107,6 +107,13 @@ getCudaCores( int deviceNum )
    return 0;
+getRegistersPerMultiprocessor( int deviceNum )
+   return 0;
 writeDeviceInfo( Logger& logger )
diff --git a/src/TNL/Devices/CudaDeviceInfo.cu b/src/TNL/Devices/CudaDeviceInfo.cu
index 84096561dcb11de412828b07e43403132af493f4..56905df9675ef10b07ba40d1876215376ade611c 100644
--- a/src/TNL/Devices/CudaDeviceInfo.cu
+++ b/src/TNL/Devices/CudaDeviceInfo.cu
@@ -135,17 +135,26 @@ getCudaCoresPerMultiprocessors( int deviceNum )
         case 1:   // Tesla generation, G80, G8x, G9x classes
             return 8;
         case 2:   // Fermi generation
-        switch( minor )
-        {
-            case 0:  // GF100 class
-                return 32;
-            case 1:  // GF10x class
-                return 48;
-        }
+            switch( minor )
+            {
+                case 0:  // GF100 class
+                    return 32;
+                case 1:  // GF10x class
+                    return 48;
+            }
         case 3: // Kepler generation -- GK10x, GK11x classes
             return 192;
         case 5: // Maxwell generation -- GM10x, GM20x classes
             return 128;
+        case 6: // Pascal generation
+            switch( minor )
+            {
+                case 0:  // GP100 class
+                    return 64;
+                case 1:  // GP10x classes
+                case 2:
+                    return 128;
+            }
             return -1;
@@ -159,6 +168,21 @@ getCudaCores( int deviceNum )
            CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
+getRegistersPerMultiprocessor( int deviceNum )
+    // results are cached because they are used for configuration of some kernels
+    static std::unordered_map< int, int > results;
+    if( results.count( deviceNum ) == 0 ) {
+        cudaDeviceProp properties;
+        cudaGetDeviceProperties( &properties, deviceNum );
+        results.emplace( deviceNum, properties.regsPerMultiprocessor );
+        return properties.regsPerMultiprocessor;
+    }
+    return results[ deviceNum ];
 writeDeviceInfo( Logger& logger )
diff --git a/src/TNL/Devices/CudaDeviceInfo.h b/src/TNL/Devices/CudaDeviceInfo.h
index b658e917703f8d97a7caca76c8055a845670506d..0b02daee53071d5899f4de14041e40f2da96adaa 100644
--- a/src/TNL/Devices/CudaDeviceInfo.h
+++ b/src/TNL/Devices/CudaDeviceInfo.h
@@ -50,6 +50,8 @@ class CudaDeviceInfo
       static int getCudaCores( int deviceNum );
+      static int getRegistersPerMultiprocessor( int deviceNum );
       static void writeDeviceInfo( Logger& logger );
diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h
index cea827a03c3dbda07fb0f5b347bbb61ca6067bf1..19bb9db873e3706cfbd9c3de18e9e69bf9bf7527 100644
--- a/src/TNL/Devices/Cuda_impl.h
+++ b/src/TNL/Devices/Cuda_impl.h
@@ -11,6 +11,8 @@
 #pragma once
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaBadAlloc.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 namespace TNL {
 namespace Devices {   
@@ -68,7 +70,6 @@ __device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx )
    return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z;
@@ -79,23 +80,20 @@ ObjectType* Cuda::passToDevice( const ObjectType& object )
    ObjectType* deviceObject;
    if( cudaMalloc( ( void** ) &deviceObject,
                    ( size_t ) sizeof( ObjectType ) ) != cudaSuccess )
-   {
-      checkCudaDevice;
-      return 0;
-   }
+      throw Exceptions::CudaBadAlloc();
    if( cudaMemcpy( ( void* ) deviceObject,
                    ( void* ) &object,
                    sizeof( ObjectType ),
                    cudaMemcpyHostToDevice ) != cudaSuccess )
-      checkCudaDevice;
       cudaFree( ( void* ) deviceObject );
       return 0;
    return deviceObject;
-   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
-   return 0;
+   throw Exceptions::CudaSupportMissing();
@@ -108,26 +106,25 @@ ObjectType Cuda::passFromDevice( const ObjectType* object )
                ( void* ) &object,
                sizeof( ObjectType ),
                cudaMemcpyDeviceToHost );
-   checkCudaDevice;
    return aux;
-   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
-   return 0;
+   throw Exceptions::CudaSupportMissing();
 template< typename ObjectType >
 void Cuda::passFromDevice( const ObjectType* deviceObject,
-                              ObjectType& hostObject )
+                           ObjectType& hostObject )
 #ifdef HAVE_CUDA
    cudaMemcpy( ( void* ) &hostObject,
                ( void* ) deviceObject,
                sizeof( ObjectType ),
                cudaMemcpyDeviceToHost );
-   checkCudaDevice;
-   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
+   throw Exceptions::CudaSupportMissing();
@@ -147,9 +144,9 @@ void Cuda::freeFromDevice( ObjectType* deviceObject )
 #ifdef HAVE_CUDA
    cudaFree( ( void* ) deviceObject );
-   checkCudaDevice;
-   TNL_ASSERT( false, std::cerr << "CUDA support is missing." );
+   throw Exceptions::CudaSupportMissing();
diff --git a/src/TNL/Devices/Host.cpp b/src/TNL/Devices/Host.cpp
index db56392bcb6ad8218e7f3a45f3d1173eae541d94..60f266734b1d6e1d8353bc8767f5b31a17bab48d 100644
--- a/src/TNL/Devices/Host.cpp
+++ b/src/TNL/Devices/Host.cpp
@@ -304,14 +304,22 @@ void Host::configSetup( Config::ConfigDescription& config, const String& prefix
 bool Host::setup( const Config::ParameterContainer& parameters,
                   const String& prefix )
-   if( parameters.getParameter< bool >( prefix + "openmp-enabled" ) )
+   if( parameters.getParameter< bool >( prefix + "openmp-enabled" ) ) {
+      std::cerr << "OpenMP is not supported - please recompile the TNL library with OpenMP." << std::endl;
+      return false;
+   }
-   setMaxThreadsCount( parameters.getParameter< int >( prefix + "openmp-max-threads" ) );
+   const int threadsCount = parameters.getParameter< int >( prefix + "openmp-max-threads" );
+   if( threadsCount > 1 && ! isOMPEnabled() )
+      std::cerr << "Warning: openmp-max-threads was set to " << threadsCount << ", but OpenMP is disabled." << std::endl;
+   setMaxThreadsCount( threadsCount );
    return true;
 } // namespace Devices
 } // namespace TNL
diff --git a/src/TNL/Devices/MIC.cpp b/src/TNL/Devices/MIC.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b16ac7854dd29ced0ed433228f5aa783d1f017ac
--- /dev/null
+++ b/src/TNL/Devices/MIC.cpp
@@ -0,0 +1,41 @@
+                          MIC.cpp  -  description
+                             -------------------
+    begin                : Feb 10, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Vit Hanousek
+#include <TNL/Devices/MIC.h>
+namespace TNL {
+namespace Devices {
+SmartPointersRegister MIC::smartPointersRegister;
+Timer MIC::smartPointersSynchronizationTimer;
+void MIC::insertSmartPointer( SmartPointer* pointer )
+   smartPointersRegister.insert( pointer, -1 );
+void MIC::removeSmartPointer( SmartPointer* pointer )
+   smartPointersRegister.remove( pointer, -1 );
+bool MIC::synchronizeDevice( int deviceId )
+   smartPointersSynchronizationTimer.start();
+   bool b = smartPointersRegister.synchronizeDevice( deviceId );
+   smartPointersSynchronizationTimer.stop();
+   return b;
+} // namespace Devices
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Devices/MIC.h b/src/TNL/Devices/MIC.h
new file mode 100644
index 0000000000000000000000000000000000000000..36678c0d27da1c873ee4bf0da2e71616c012d2bc
--- /dev/null
+++ b/src/TNL/Devices/MIC.h
@@ -0,0 +1,179 @@
+                          MIC.h  -  description
+                          -------------------
+    begin                : Nov 7, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Vit Hanousek
+#pragma once
+#include <iostream>
+#include <cstring>
+#include <unistd.h>
+#include <TNL/String.h>
+#include <TNL/Assert.h>
+#include <TNL/SmartPointersRegister.h>
+#include <TNL/Timer.h>
+#include <TNL/Devices/CudaCallable.h>
+namespace TNL {
+namespace Devices {
+//useful macros from Intel's tutorials -- but we do not use it, becaouse it is tricky (system of maping variables CPU-MIC)
+#define ALLOC alloc_if(1) //alloac variable at begining of offloaded block -- default
+#define FREE free_if(1) // delete variable at the end of offloaded block -- default
+#define RETAIN free_if(0) //do not delete variable at the end of offladed block  
+#define REUSE alloc_if(0) //do not alloc variable at begin of offloaded block, reuse variable on MIC which was not deleted befeore
+//structure which hides pointer - bypass mapping of variables and addresses of arrays and allow get RAW addres of MIC memory to RAM
+template< typename Type >
+struct MICHider{
+    Type *pointer;
+//inflatable structure -- structures can be copied to MIC - classes not (viz paper published after CSJP 2016 in Krakow)
+//object can be copied in side this structure and then copied into MIC memory
+template <unsigned int VELIKOST>
+struct MICStruct{
+	uint8_t data[VELIKOST];
+//Macros which can make code better readeble --but they are tricky, creating variables with specific names...
+//version using inflatable structure
+#define TNLMICSTRUCT(bb,typ) Devices::MICStruct<sizeof(typ)> s ## bb; \
+                             memcpy((void*)& s ## bb,(void*)& bb,sizeof(typ));
+#define TNLMICSTRUCTOFF(bb,typ) s ## bb
+#define TNLMICSTRUCTUSE(bb,typ) typ * kernel ## bb = (typ*) &s ## bb;
+#define TNLMICSTRUCTALLOC(bb,typ) typ * kernel ## bb = (typ*) malloc (sizeof(typ)); \
+                                memcpy((void*)kernel ## bb,(void*) & s ## bb, sizeof(typ));
+//version which retypes pointer of object to pointer to array of uint8_t, 
+//object can be copied using uint8_t pointer as array with same length as object size
+#define TNLMICHIDE(bb,typ) uint8_t * u ## bb=(uint8_t *)&bb; \
+                           MICHider<typ> kernel ## bb;
+#define TNLMICHIDEALLOCOFF(bb,typ) in(u ## bb:length(sizeof(typ))) out(kernel ## bb)
+#define TNLMICHIDEALLOC(bb,typ) kernel ## bb.pointer=(typ*)malloc(sizeof(typ)); \
+                                memcpy((void*)kernel ## bb.pointer,(void*)u ## bb,sizeof(typ));
+#define TNLMICHIDEFREEOFF(bb,typ) in(kernel ## bb)
+#define TNLMICHIDEFREE(bb,typ) free((void*)kernel ## bb.pointer
+class MIC
+   public:
+        static String getDeviceType()
+        {
+            return String( "MIC" );
+        };
+#ifdef HAVE_MIC  
+       //useful debuging -- but produce warning
+       __cuda_callable__ static inline void CheckMIC(void)
+       {
+            #ifdef __MIC__
+                    std::cout<<"ON MIC"<<std::endl;
+            #else
+                    std::cout<<"ON CPU" <<std::endl;
+            #endif
+        };
+        //old copying funciton  -- deprecated
+        template <typename TYP>
+        static
+        TYP * passToDevice(TYP &objektCPU)
+        {
+                uint8_t * uk=(uint8_t *)&objektCPU; 
+                MICHider<TYP> ret;
+                #pragma offload target(mic) in(uk:length(sizeof(TYP))) out(ret)
+                {
+                    ret.pointer=(TYP*)malloc(sizeof(TYP));
+                    std::memcpy((void*)ret.pointer,(void*)uk,sizeof(TYP));
+                }
+                return ret.pointer;
+                std::cout << "Někdo mně volá :-D" <<std::endl;
+        };
+        //old cleaning function -- deprecated
+        template <typename TYP>
+        static
+        void freeFromDevice(TYP *objektMIC)
+        {
+            MICHider<TYP> ptr;
+            ptr.pointer=objektMIC;
+            #pragma offload target(mic) in(ptr)
+            {
+                free((void*)ptr.pointer);
+            }
+        };
+        static inline
+        void CopyToMIC(void* mic_ptr,void* ptr,size_t size)
+        {
+            uint8_t image[size];
+            std::memcpy((void*)&image,ptr,size);
+            Devices::MICHider<void> hide_ptr;
+            hide_ptr.pointer=mic_ptr;
+            #pragma offload target(mic) in(hide_ptr) in(image) in(size)
+            {
+                std::memcpy((void*)hide_ptr.pointer,(void*)&image,size);
+            }
+        };
+        static inline
+        void* AllocMIC(size_t size)
+        {
+            Devices::MICHider<void> hide_ptr;
+            #pragma offload target(mic) out(hide_ptr) in(size)
+            {
+                hide_ptr.pointer=malloc(size);
+            }
+            return hide_ptr.pointer;
+        };
+        static inline
+        void FreeMIC(void* ptr)
+        {
+                Devices::MICHider<void> hide_ptr;
+                hide_ptr.pointer=ptr;
+                #pragma offload target(mic) in(hide_ptr)
+                {
+                        free(hide_ptr.pointer);
+                }
+        };
+   static void insertSmartPointer( SmartPointer* pointer );
+   static void removeSmartPointer( SmartPointer* pointer );
+   // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
+   // called to get the device ID.
+   static bool synchronizeDevice( int deviceId = -1 );
+   static Timer smartPointersSynchronizationTimer;
+   protected:
+   static SmartPointersRegister smartPointersRegister;
+}//namespace Devices
+}//namespace TNL
diff --git a/src/TNL/Exceptions/CMakeLists.txt b/src/TNL/Exceptions/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..856f8c2fc73fd86f8e1530392bcb158dbaca5568
--- /dev/null
+++ b/src/TNL/Exceptions/CMakeLists.txt
@@ -0,0 +1,7 @@
+SET( headers CudaBadAlloc.h
+             CudaRuntimeError.h
+             CudaSupportMissing.h
+             MICBadAlloc.h
+             MICSupportMissing.h )
+INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Exceptions )
diff --git a/src/TNL/Exceptions/CudaBadAlloc.h b/src/TNL/Exceptions/CudaBadAlloc.h
new file mode 100644
index 0000000000000000000000000000000000000000..4dc78c488e65fe6e57fcfb8d5672d2554f2262b9
--- /dev/null
+++ b/src/TNL/Exceptions/CudaBadAlloc.h
@@ -0,0 +1,40 @@
+                          CudaBadAlloc.h  -  description
+                             -------------------
+    begin                : Jun 18, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include <new>
+namespace TNL {
+namespace Exceptions {
+struct CudaBadAlloc
+   : public std::bad_alloc
+   CudaBadAlloc()
+   {
+#ifdef HAVE_CUDA
+      // Make sure to clear the CUDA error, otherwise the exception handler
+      // might throw another exception with the same error.
+      cudaGetLastError();
+   }
+   const char* what() const throw()
+   {
+      return "Failed to allocate memory on the CUDA device: "
+             "most likely there is not enough space on the device memory.";
+   }
+} // namespace Exceptions
+} // namespace TNL
diff --git a/src/TNL/Exceptions/CudaRuntimeError.h b/src/TNL/Exceptions/CudaRuntimeError.h
new file mode 100644
index 0000000000000000000000000000000000000000..a6773c23c03ac0ab97ef5e9b6ecc8884ac324255
--- /dev/null
+++ b/src/TNL/Exceptions/CudaRuntimeError.h
@@ -0,0 +1,77 @@
+                          CudaRuntimeError.h  -  description
+                             -------------------
+    begin                : Jun 18, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include "CudaSupportMissing.h"
+namespace TNL {
+namespace Exceptions {
+#ifdef HAVE_CUDA
+   using CudaStatusType = cudaError;
+   using CudaStatusType = int;
+class CudaRuntimeError
+   : public std::runtime_error
+   CudaRuntimeError( CudaStatusType error_code )
+   : std::runtime_error( "CUDA ERROR " + std::to_string( (int) error_code ) + " (" + name( error_code ) + "): "
+                         + description( error_code ) + "." ),
+     code_( error_code )
+   {}
+   CudaRuntimeError( CudaStatusType error_code, const std::string& what_arg )
+   : std::runtime_error( "CUDA ERROR " + std::to_string( (int) error_code ) + " (" + name( error_code ) + "): "
+                         + description( error_code ) + ".\nDetails: " + what_arg ),
+     code_(error_code)
+   {}
+   CudaRuntimeError( CudaStatusType error_code, const char* file_name, int line )
+   : std::runtime_error( "CUDA ERROR " + std::to_string( (int) error_code ) + " (" + name( error_code ) + "): "
+                         + description( error_code ) + ".\nSource: line " + std::to_string( line )
+                         + " in " + file_name + ": " + description( error_code ) ),
+     code_(error_code)
+   {}
+   CudaStatusType code() const
+   {
+      return code_;
+   }
+   static std::string name( CudaStatusType error_code )
+   {
+#ifdef HAVE_CUDA
+      return cudaGetErrorName( error_code );
+      throw CudaSupportMissing();
+   }
+   static std::string description( CudaStatusType error_code )
+   {
+#ifdef HAVE_CUDA
+      return cudaGetErrorString( error_code );
+      throw CudaSupportMissing();
+   }
+   CudaStatusType code_;
+} // namespace Exceptions
+} // namespace TNL
diff --git a/src/TNL/Exceptions/CudaSupportMissing.h b/src/TNL/Exceptions/CudaSupportMissing.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a7f978a122a0c0f53f82a06d3dc03bbde40fd36
--- /dev/null
+++ b/src/TNL/Exceptions/CudaSupportMissing.h
@@ -0,0 +1,30 @@
+                          CudaSupportMissing.h  -  description
+                             -------------------
+    begin                : Jun 18, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include <stdexcept>
+namespace TNL {
+namespace Exceptions {
+struct CudaSupportMissing
+   : public std::runtime_error
+   CudaSupportMissing()
+   : std::runtime_error( "CUDA support is missing, but the program called a function which needs it. "
+                         "Please recompile the program with CUDA support." )
+   {}
+} // namespace Exceptions
+} // namespace TNL
diff --git a/src/TNL/Exceptions/MICBadAlloc.h b/src/TNL/Exceptions/MICBadAlloc.h
new file mode 100644
index 0000000000000000000000000000000000000000..b8f3a9157c54d8155652a42a700ad71a221aa201
--- /dev/null
+++ b/src/TNL/Exceptions/MICBadAlloc.h
@@ -0,0 +1,31 @@
+                          MICBadAlloc.h  -  description
+                             -------------------
+    begin                : Jul 31, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include <new>
+namespace TNL {
+namespace Exceptions {
+struct MICBadAlloc
+   : public std::bad_alloc
+   const char* what() const throw()
+   {
+      return "Failed to allocate memory on the MIC device: "
+             "most likely there is not enough space on the device memory.";
+   }
+} // namespace Exceptions
+} // namespace TNL
diff --git a/src/TNL/Exceptions/MICSupportMissing.h b/src/TNL/Exceptions/MICSupportMissing.h
new file mode 100644
index 0000000000000000000000000000000000000000..6d4260e6addbbb9dd89a7c9d5a07833485c6a0c2
--- /dev/null
+++ b/src/TNL/Exceptions/MICSupportMissing.h
@@ -0,0 +1,30 @@
+                          MICSupportMissing.h  -  description
+                             -------------------
+    begin                : Jul 31, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include <stdexcept>
+namespace TNL {
+namespace Exceptions {
+struct MICSupportMissing
+   : public std::runtime_error
+   MICSupportMissing()
+   : std::runtime_error( "MIC support is missing, but the program called a function which needs it. "
+                         "Please recompile the program with MIC support." )
+   {}
+} // namespace Exceptions
+} // namespace TNL
diff --git a/src/TNL/Experimental/Arithmetics/CMakeLists.txt b/src/TNL/Experimental/Arithmetics/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Arithmetics/Real.h b/src/TNL/Experimental/Arithmetics/Real.h
index 96d8369ef62d3211cc8182833569c85e7e7b6a44..7f0943147705757f3eb0c745b6e0025dbd763614 100644
--- a/src/TNL/Experimental/Arithmetics/Real.h
+++ b/src/TNL/Experimental/Arithmetics/Real.h
@@ -11,7 +11,7 @@
 #pragma once
 #include <iostream>
-#include <math.h>
+#include <cmath>
 #include <TNL/Experimental/Arithmetics/FlopsCounter.h>
 namespace TNL {
@@ -313,105 +313,105 @@ template< class T > const Real< T > fabs( const Real< T >& v )
 template< class T > const Real< T > sqrt( const Real< T >& v )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::sqrt( v. Data() ) );
+   return Real< T >( std::sqrt( v. Data() ) );
 template< class T > const Real< T > pow( const Real< T >& x, const Real< T >& exp )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::pow( x. Data(), exp. Data() ) );
+   return Real< T >( std::pow( x. Data(), exp. Data() ) );
 template< class T > const Real< T > pow( const Real< T >& x, const T& exp )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::pow( x. Data(), exp ) );
+   return Real< T >( std::pow( x. Data(), exp ) );
 template< class T > const Real< T > cos( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::cos( x. Data() ) );
+   return Real< T >( std::cos( x. Data() ) );
 template< class T > const Real< T > sin( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::sin( x. Data() ) );
+   return Real< T >( std::sin( x. Data() ) );
 template< class T > const Real< T > tan( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( tan( x. Data() ) );
+   return Real< T >( std::tan( x. Data() ) );
 template< class T > const Real< T > acos( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( acos( x. Data() ) );
+   return Real< T >( std::acos( x. Data() ) );
 template< class T > const Real< T > asin( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( asin( x. Data() ) );
+   return Real< T >( std::asin( x. Data() ) );
 template< class T > const Real< T > atan( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( atan( x. Data() ) );
+   return Real< T >( std::atan( x. Data() ) );
 template< class T > const Real< T > atan2( const Real< T >& x, const Real< T >& exp )
    tnl_flops_counter. recordFunction();
-   return Real< T >( atan2( x. Data(), exp. Data() ) );
+   return Real< T >( std::atan2( x. Data(), exp. Data() ) );
 template< class T > const Real< T > atan2( const Real< T >& x, const T& exp )
    tnl_flops_counter. recordFunction();
-   return Real< T >( atan2( x. Data(), exp ) );
+   return Real< T >( std::atan2( x. Data(), exp ) );
 template< class T > const Real< T > cosh( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( cosh( x. Data() ) );
+   return Real< T >( std::cosh( x. Data() ) );
 template< class T > const Real< T > sinh( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( sinh( x. Data() ) );
+   return Real< T >( std::sinh( x. Data() ) );
 template< class T > const Real< T > tanh( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( ::tanh( x. Data() ) );
+   return Real< T >( std::tanh( x. Data() ) );
 template< class T > const Real< T > exp( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( exp( x. Data() ) );
+   return Real< T >( std::exp( x. Data() ) );
 template< class T > const Real< T > log( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( log( x. Data() ) );
+   return Real< T >( std::log( x. Data() ) );
 template< class T > const Real< T > log10( const Real< T >& x )
    tnl_flops_counter. recordFunction();
-   return Real< T >( log10( x. Data() ) );
+   return Real< T >( std::log10( x. Data() ) );
 template< class T >
diff --git a/src/TNL/Experimental/CMakeLists.txt b/src/TNL/Experimental/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Operators/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/CMakeLists.txt
index 202550179b61cbf4a47bca43f96f751d0e2510eb..115fd1c2cde483c4fbfe64951210d69827976d2b 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/CMakeLists.txt
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/CMakeLists.txt
@@ -15,4 +15,4 @@ set( tnl_implementation_operators_godunov_SOURCES
-INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/operators/godunov )
+INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Operators/Godunov/ )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/godunovEikonal.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/godunovEikonal.h
index a90c7258438a52c0afebf2d0e9838e1491f16a13..9cebf0ea9a2608526cff1dcf1290e41fa4f7b083 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/godunovEikonal.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Eikonal/godunovEikonal.h
@@ -37,7 +37,7 @@ class godunovEikonalScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 1, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
       static String getType();
@@ -49,11 +49,11 @@ class godunovEikonalScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Inde
                        const RealType& f ) const
          const RealType& hx_inv = entity.getMesh().template getSpaceStepsProducts< -1 >();
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex< 1 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex< 1 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1 >() ];
          if( f > 0.0 )
@@ -99,7 +99,7 @@ class godunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 2, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -114,12 +114,12 @@ class godunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Inde
          const RealType& hx_inv = entity.getMesh().template getSpaceStepsProducts< -1,  0 >();
          const RealType& hy_inv = entity.getMesh().template getSpaceStepsProducts<  0, -1 >();
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();   
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();   
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex<  1,  0 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1,  0 >() ];
-         const RealType& u_n = u[ neighbourEntities.template getEntityIndex<  0,  1 >() ];
-         const RealType& u_s = u[ neighbourEntities.template getEntityIndex<  0, -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex<  1,  0 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1,  0 >() ];
+         const RealType& u_n = u[ neighborEntities.template getEntityIndex<  0,  1 >() ];
+         const RealType& u_s = u[ neighborEntities.template getEntityIndex<  0, -1 >() ];
          if( f > 0.0 )
@@ -180,7 +180,7 @@ class godunovEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 3, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;   
       static String getType();
@@ -195,14 +195,14 @@ class godunovEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Inde
          const RealType& hy_inv = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >();
          const RealType& hz_inv = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >();
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ];
-         const RealType& u_n = u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ];
-         const RealType& u_s = u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ];
-         const RealType& u_t = u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ];
-         const RealType& u_b = u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ];
+         const RealType& u_n = u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ];
+         const RealType& u_s = u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ];
+         const RealType& u_t = u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ];
+         const RealType& u_b = u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ];
          if( f > 0.0 )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/CMakeLists.txt
old mode 100755
new mode 100644
index 8b74f755ed0a74e7001f1c155c60f61c650679d2..bb9c4e3b4fb36d9e9982f8abcead16a1e5dfdb16
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/CMakeLists.txt
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/CMakeLists.txt
@@ -21,4 +21,4 @@ set( tnl_implementation_operators_godunov-eikonal_SOURCES
-INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/operators/godunov-eikonal )
+INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Operators/Godunov-Eikonal/ )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/godunovEikonal.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/godunovEikonal.h
index d4941ba38a992fd76047a7ead2f538b79af2f079..fcff6b36aa023b548972117521b33b772029a838 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/godunovEikonal.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/godunovEikonal.h
@@ -20,8 +20,7 @@
 #include <solvers/preconditioners/tnlDummyPreconditioner.h>
 #include <solvers/tnlSolverMonitor.h>
 #include <core/tnlLogger.h>
-#include <core/vectors/tnlVector.h>
-#include <core/vectors/tnlSharedVector.h>
+#include <TNL/Containers/Vector.h>
 #include <core/mfilename.h>
 #include <mesh/tnlGrid.h>
@@ -49,7 +48,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 1, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -98,7 +97,7 @@ class godunovEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 2, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
       static String getType();
@@ -143,7 +142,7 @@ class godunovEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 3, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal.h
index e2f1859231a8096ae38e0dcaff664f490efa5cfc..f462245848757f6ccf9b9349ac4baa38cb613e4b 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal.h
@@ -21,8 +21,7 @@
 #include <solvers/preconditioners/tnlDummyPreconditioner.h>
 #include <solvers/tnlSolverMonitor.h>
 #include <core/tnlLogger.h>
-#include <core/vectors/tnlVector.h>
-#include <core/vectors/tnlSharedVector.h>
+#include <TNL/Containers/Vector.h>
 #include <core/mfilename.h>
 #include <mesh/tnlGrid.h>
 #include <core/tnlCuda.h>
@@ -52,7 +51,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 1, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -121,7 +120,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
 #ifdef HAVE_CUDA
    __device__ __host__
@@ -150,7 +149,7 @@ public:
                    const Vector& u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities) const;
+                   const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities) const;
  #ifdef HAVE_CUDA
@@ -161,7 +160,7 @@ public:
                    const RealType* u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities) const;
+                   const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities) const;
 #ifdef HAVE_CUDA
    __device__ __host__
@@ -197,7 +196,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
 #ifdef HAVE_CUDA
@@ -227,7 +226,7 @@ public:
                    const Vector& u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-  	  	  	       const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities  ) const;
+  	  	  	       const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities  ) const;
 #ifdef HAVE_CUDA
@@ -238,7 +237,7 @@ public:
                   const RealType* u,
                   const RealType& time,
                   const IndexType boundaryCondition,
- 	  	  	      const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities ) const;
+ 	  	  	      const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities ) const;
 #ifdef HAVE_CUDA
    __device__ __host__
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal2D_impl.h
index 29738ab330e3b951d6b8a9bd6be89b5606c0f5b0..5ba9e3f781b5078f71acee19cd754364bca32b50 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal2D_impl.h
@@ -145,7 +145,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Vector& u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities ) const
+          	  	  	  	  	  	  	  	  	  	                     const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities ) const
@@ -182,24 +182,24 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 	   if(coordinates.x() == mesh.getDimensions().x() - 1)
-		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< -1,  0 >()];
-		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< 1,  0 >()];
 	   if(coordinates.x() == 0)
-		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< 1,  0 >()];
-		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< -1,  0 >()];
 	   if(coordinates.y() == mesh.getDimensions().y() - 1)
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  1 >()];
 	   if(coordinates.y() == 0)
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  1 >()];
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  -1 >()];
 	   //xb *= ihx;
@@ -269,9 +269,9 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 //		   else *//*if(boundaryCondition & 4)
 //			   xf = 0.0;
 //		   else /**/if(coordinates.x() == mesh.getDimensions().x() - 1)
-//			   xf = negativePart((u[neighbourEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
+//			   xf = negativePart((u[neighborEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
 //		   else
-//			   xf = negativePart((u[neighbourEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
+//			   xf = negativePart((u[neighborEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
 //	/**/ /*  if(boundaryCondition & 4)
 //			   xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx;
@@ -280,25 +280,25 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 //		   else /**/if(coordinates.x() == 0)
 //			   xb = positivePart((u[cellIndex] - u[mesh.template getCellNextToCell<+1,0>( cellIndex )])*ihx);
 //		   else
-//			   xb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1,  0 >()])*ihx);
+//			   xb = positivePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< -1,  0 >()])*ihx);
 //	/**/  /* if(boundaryCondition & 1)
 //			   yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy;
 //		   else *//*if(boundaryCondition & 8)
 //			   yf = 0.0;
 //		   else /**/if(coordinates.y() == mesh.getDimensions().y() - 1)
-//			   yf = negativePart((u[neighbourEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
+//			   yf = negativePart((u[neighborEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
 //		   else
-//			   yf = negativePart((u[neighbourEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
+//			   yf = negativePart((u[neighborEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
 //	/**/  /* if(boundaryCondition & 8)
 //			   yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy;
 //		   else *//*if(boundaryCondition & 1)
 //			   yb = 0.0;
 //		   else /**/if(coordinates.y() == 0)
-//			   yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  1 >()])*ihy);
+//			   yb = positivePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< 0,  1 >()])*ihy);
 //		   else
-//			   yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  -1 >()])*ihy);
+//			   yb = positivePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< 0,  -1 >()])*ihy);
 //		   if(xb - xf > 0.0)
 //			   xf = 0.0;
@@ -323,36 +323,36 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 //		   else*//* if(boundaryCondition & 4)
 //			   xf = 0.0;
 //		   else /**/if(coordinates.x() == mesh.getDimensions().x() - 1)
-//			   xf = positivePart((u[neighbourEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
+//			   xf = positivePart((u[neighborEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
 //		   else
-//			   xf = positivePart((u[neighbourEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
+//			   xf = positivePart((u[neighborEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
 //	/**/  /* if(boundaryCondition & 4)
 //			   xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx;
 //		   else*//* if(boundaryCondition & 2)
 //			   xb = 0.0;
 //		   else /**/if(coordinates.x() == 0)
-//			   xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 1,  0 >()])*ihx);
+//			   xb = negativePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< 1,  0 >()])*ihx);
 //		   else
-//			   xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1,  0 >()])*ihx);
+//			   xb = negativePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< -1,  0 >()])*ihx);
 //	/**/ /*  if(boundaryCondition & 1)
 //			   yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy;
 //		   else *//*if(boundaryCondition & 8)
 //			   yf = 0.0;
 //		   else /**/if(coordinates.y() == mesh.getDimensions().y() - 1)
-//			   yf = positivePart((u[neighbourEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
+//			   yf = positivePart((u[neighborEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
 //		   else
-//			   yf = positivePart((u[neighbourEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
+//			   yf = positivePart((u[neighborEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
 //	/**/  /* if(boundaryCondition & 8)
 //			   yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy;
 //		   else*//* if(boundaryCondition & 1)
 //			   yb = 0.0;
 //		   else /**/if(coordinates.y() == 0)
-//			   yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  1 >()])*ihy);
+//			   yb = negativePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< 0,  1 >()])*ihy);
 //		   else
-//			   yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  -1 >()])*ihy);
+//			   yb = negativePart((u[cellIndex] - u[neighborEntities.template getEntityIndex< 0,  -1 >()])*ihy);
 //		   if(xb - xf > 0.0)
@@ -401,7 +401,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real* u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities) const
+          	  	  	  	  	  	  	  	  	  	                     const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities) const
 	RealType signui;
@@ -418,24 +418,24 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 	   if(coordinates.x() == mesh.getDimensions().x() - 1)
-		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< -1,  0 >()];
-		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< 1,  0 >()];
 	   if(coordinates.x() == 0)
-		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< 1,  0 >()];
-		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< -1,  0 >()];
 	   if(coordinates.y() == mesh.getDimensions().y() - 1)
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  1 >()];
 	   if(coordinates.y() == 0)
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  1 >()];
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  -1 >()];
 	   if(signui > 0.0)
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal3D_impl.h
index 3aa8aeed5357e7a4f0b0f176e9002c89a4295a84..a352a07375a6457da3cff77a7f89dde88c94e8f7 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovEikonal3D_impl.h
@@ -127,7 +127,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Vector& u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities  ) const
+          	  	          	  	  	  	  	  	  	  	  	  	                     const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities  ) const
 	if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or
 		 (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or
@@ -158,35 +158,35 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re
 	   if(coordinates.x() == mesh.getDimensions().x() - 1)
-		   xf += u[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-		   xf += u[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
 	   if(coordinates.x() == 0)
-		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
-		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
 	   if(coordinates.y() == mesh.getDimensions().y() - 1)
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
 	   if(coordinates.y() == 0)
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
 	   if(coordinates.z() == mesh.getDimensions().z() - 1)
-		   zf += u[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+		   zf += u[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-		   zf += u[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+		   zf += u[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
 	   if(coordinates.z() == 0)
-		   zb -= u[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+		   zb -= u[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
-		   zb -= u[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+		   zb -= u[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
 	   if(signui > 0.0)
@@ -262,7 +262,7 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real* u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities
+          	  	          	  	  	  	  	  	  	  	  	  	                     const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 ) const
 	RealType signui;
@@ -279,35 +279,35 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re
 		   if(coordinates.x() == mesh.getDimensions().x() - 1)
-			   xf += u[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+			   xf += u[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-			   xf += u[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+			   xf += u[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
 		   if(coordinates.x() == 0)
-			   xb -= u[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+			   xb -= u[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
-			   xb -= u[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+			   xb -= u[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
 		   if(coordinates.y() == mesh.getDimensions().y() - 1)
-			   yf += u[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+			   yf += u[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-			   yf += u[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+			   yf += u[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
 		   if(coordinates.y() == 0)
-			   yb -= u[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+			   yb -= u[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
-			   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+			   yb -= u[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
 		   if(coordinates.z() == mesh.getDimensions().z() - 1)
-			   zf += u[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+			   zf += u[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-			   zf += u[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+			   zf += u[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
 		   if(coordinates.z() == 0)
-			   zb -= u[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+			   zb -= u[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
-			   zb -= u[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+			   zb -= u[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
 		   if(signui > 0.0)
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap.h
index db7a087224b933150fcda1ed47119d2e23448000..deeff7c08850c41319ea2bfbfd55fa8d83cb9a9d 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap.h
@@ -21,8 +21,7 @@
 #include <solvers/preconditioners/tnlDummyPreconditioner.h>
 #include <solvers/tnlSolverMonitor.h>
 #include <core/tnlLogger.h>
-#include <core/vectors/tnlVector.h>
-#include <core/vectors/tnlSharedVector.h>
+#include <TNL/Containers/Vector.h>
 #include <core/mfilename.h>
 #include <mesh/tnlGrid.h>
 #include <core/tnlCuda.h>
@@ -52,7 +51,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 1, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -121,7 +120,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
 #ifdef HAVE_CUDA
    __device__ __host__
@@ -150,7 +149,7 @@ public:
                    const Vector& u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+                   const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities,
                    const Vector& map) const;
  #ifdef HAVE_CUDA
@@ -162,7 +161,7 @@ public:
                    const RealType* u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+                   const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities,
   	  	  	       const RealType* map) const;
 #ifdef HAVE_CUDA
    __device__ __host__
@@ -199,7 +198,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
 #ifdef HAVE_CUDA
@@ -229,7 +228,7 @@ public:
                    const Vector& u,
                    const RealType& time,
                    const IndexType boundaryCondition,
-  	  	  	       const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities  ) const;
+  	  	  	       const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities  ) const;
 #ifdef HAVE_CUDA
@@ -240,7 +239,7 @@ public:
                   const RealType* u,
                   const RealType& time,
                   const IndexType boundaryCondition,
- 	  	  	      const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities) const;
+ 	  	  	      const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities) const;
 #ifdef HAVE_CUDA
    __device__ __host__
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap2D_impl.h
index c5f7d613497e7f61b449a2e16190c18988a12c3f..9b3117cf25a8d039002db1c6ba7328ea0870d492 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov-Eikonal/parallelGodunovMap2D_impl.h
@@ -145,7 +145,7 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Vector& u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Vector& map) const
@@ -190,24 +190,24 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real,
 	   if(coordinates.x() == mesh.getDimensions().x() - 1)
-		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< -1,  0 >()];
-		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< 1,  0 >()];
 	   if(coordinates.x() == 0)
-		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< 1,  0 >()];
-		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< -1,  0 >()];
 	   if(coordinates.y() == mesh.getDimensions().y() - 1)
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  1 >()];
 	   if(coordinates.y() == 0)
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  1 >()];
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  -1 >()];
 	   if(signui > 0.0)
@@ -286,7 +286,7 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real* u,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
           	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
-          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+          	  	  	  	  	  	  	  	  	  	                     const tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities,
           	  	  	  	  	  	  	  	  	  	                     const Real* map) const
 //	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
@@ -316,24 +316,24 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real,
 	   if(coordinates.x() == mesh.getDimensions().x() - 1)
-		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< -1,  0 >()];
-		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xf += u[neighborEntities.template getEntityIndex< 1,  0 >()];
 	   if(coordinates.x() == 0)
-		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< 1,  0 >()];
-		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		   xb -= u[neighborEntities.template getEntityIndex< -1,  0 >()];
 	   if(coordinates.y() == mesh.getDimensions().y() - 1)
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yf += u[neighborEntities.template getEntityIndex< 0,  1 >()];
 	   if(coordinates.y() == 0)
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  1 >()];
-		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		   yb -= u[neighborEntities.template getEntityIndex< 0,  -1 >()];
 	   if(signui > 0.0)
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov/CMakeLists.txt
old mode 100755
new mode 100644
index 74f7510d72329f60305bb366604e72b7201a3ea0..98b3eefe79e290a43e25867cf4e2c7cad9a1a419
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov/CMakeLists.txt
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/Godunov/CMakeLists.txt
@@ -14,4 +14,4 @@ set( tnl_implementation_operators_godunov_SOURCES
-INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/operators/godunov )
+INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Operators/Godunov/ )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/upwindEikonal.h b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/upwindEikonal.h
index e8f793260999eba3f7e2082a5237ee3daba6e86a..7467f66715e6dcc5fcf9a04ac0ecd66d1f026a11 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/upwindEikonal.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Operators/Hamilton-Jacobi/upwindEikonal.h
@@ -41,7 +41,7 @@ class upwindEikonalScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 1, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -54,11 +54,11 @@ class upwindEikonalScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index
                        const RealType& f ) const
          const RealType& hx_inv = entity.getMesh().template getSpaceStepsProducts< -1 >();
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex< 1 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex< 1 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1 >() ];
          if( f > 0.0 )
@@ -94,7 +94,7 @@ class upwindEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 2, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
       static String getType();
@@ -108,12 +108,12 @@ class upwindEikonalScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index
          const RealType& hx_inv = entity.getMesh().template getSpaceStepsProducts< -1,  0 >();
          const RealType& hy_inv = entity.getMesh().template getSpaceStepsProducts<  0, -1 >();
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();   
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();   
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex<  1,  0 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1,  0 >() ];
-         const RealType& u_n = u[ neighbourEntities.template getEntityIndex<  0,  1 >() ];
-         const RealType& u_s = u[ neighbourEntities.template getEntityIndex<  0, -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex<  1,  0 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1,  0 >() ];
+         const RealType& u_n = u[ neighborEntities.template getEntityIndex<  0,  1 >() ];
+         const RealType& u_s = u[ neighborEntities.template getEntityIndex<  0, -1 >() ];
          if( f > 0.0 )
             const RealType xf = negativePart( ( u_e - u_c ) * hx_inv );
@@ -151,7 +151,7 @@ class upwindEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index
       typedef Device DeviceType;
       typedef Index IndexType;
       typedef tnlGrid< 3, Real, Device, Index > MeshType;
-      typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+      typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -167,14 +167,14 @@ class upwindEikonalScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index
          const RealType& hy_inv = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >();
          const RealType& hz_inv = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >();
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const RealType& u_c = u[ entity.getIndex() ];
-         const RealType& u_e = u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ];
-         const RealType& u_w = u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ];
-         const RealType& u_n = u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ];
-         const RealType& u_s = u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ];
-         const RealType& u_t = u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ];
-         const RealType& u_b = u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ];
+         const RealType& u_e = u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ];
+         const RealType& u_w = u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ];
+         const RealType& u_n = u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ];
+         const RealType& u_s = u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ];
+         const RealType& u_t = u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ];
+         const RealType& u_b = u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ];
          if( f > 0.0 )
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h
index d17b47ac5125bebf9ac9619a9496b5dc4b46b394..813e99c72cd6de9b597c10a943ab666220453018 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/main.h
@@ -47,26 +47,26 @@ int main( int argc, char* argv[] )
    if(dim == 2)
-		tnlFastSweepingMap<tnlGrid<2,double,tnlHost, int>, double, int> solver;
+		tnlFastSweepingMap<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
 			cerr << "Solver failed to initialize." << endl;
 			return EXIT_FAILURE;
-		checkCudaDevice;
 	   cout << "-------------------------------------------------------------" << endl;
 	   cout << "Starting solver..." << endl;
 //   else if(dim == 3)
 //   {
-//		tnlFastSweepingMap<tnlGrid<3,double,tnlHost, int>, double, int> solver;
+//		tnlFastSweepingMap<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
 //		if(!solver.init(parameters))
 //	   {
 //			cerr << "Solver failed to initialize." << endl;
 //			return EXIT_FAILURE;
 //	   }
-//		checkCudaDevice;
 //	   cout << "-------------------------------------------------------------" << endl;
 //	   cout << "Starting solver..." << endl;
 //	   solver.run();
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h
index f6f2150a4df3ae6f0fe85efeeb6ea9abac0007d3..c568329ba2aa5fdb8fed303d43b25e73f210c014 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap.h
@@ -17,10 +17,10 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
 #include <functions/tnlMeshFunction.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
 #include <limits.h>
@@ -55,7 +55,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -137,7 +137,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
index 5e9d11b5264e301d8eb9a30b625861bf6a79ad2b..b865bef001413f10d2862e0c16675e5c33309470 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
@@ -135,7 +135,7 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	checkCudaDevice;
 	return true;
@@ -164,21 +164,21 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 		cudaMemcpy(this->changed, &zero, sizeof(int), cudaMemcpyHostToDevice);
-		checkCudaDevice;
 		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0, this->changed);
-		checkCudaDevice;
 		cudaMemcpy(&run, this->changed,sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		cout << "Finished set of sweeps #" << cntr << "           " << run << endl;
-	checkCudaDevice;
 	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -207,35 +207,35 @@ template< typename MeshReal,
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index* something_changed)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
 	if(map_cuda[Entity.getIndex()] != 0.0)
-		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 		Real value = cudaDofVector2[Entity.getIndex()];
 		Real im = abs(1.0/map_cuda[Entity.getIndex()]);
 		Real a,b, tmp;
 		if( i == 0 )
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 		else if( i == Mesh.getDimensions().x() - 1 )
-			a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-			a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 		if( j == 0 )
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 		else if( j == Mesh.getDimensions().y() - 1 )
-			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-			b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -269,11 +269,11 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 	int i = threadIdx.x + blockDim.x*blockIdx.x;
 	int j = blockDim.y*blockIdx.y + threadIdx.y;
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	int gid = Entity.getIndex();
@@ -294,18 +294,18 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //	{
 //		if(cudaDofVector[Entity.getIndex()] > 0)
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1111(i,j);
 //					else
 //						setupSquare1110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1101(i,j);
 //					else
 //						setupSquare1100(i,j);
@@ -313,16 +313,16 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1011(i,j);
 //					else
 //						setupSquare1010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare1001(i,j);
 //					else
 //						setupSquare1000(i,j);
@@ -331,18 +331,18 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //		}
 //		else
 //		{
-//			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0111(i,j);
 //					else
 //						setupSquare0110(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0101(i,j);
 //					else
 //						setupSquare0100(i,j);
@@ -350,16 +350,16 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //			}
 //			else
 //			{
-//				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0011(i,j);
 //					else
 //						setupSquare0010(i,j);
 //				}
 //				else
 //				{
-//					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //						setupSquare0001(i,j);
 //					else
 //						setupSquare0000(i,j);
@@ -397,7 +397,7 @@ Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i, int* changed)
+__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed)
 	__shared__ int something_changed;
@@ -512,7 +512,7 @@ __global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, d
-__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
@@ -571,14 +571,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -590,14 +590,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -609,18 +609,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -629,9 +629,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -642,18 +642,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -662,9 +662,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -675,18 +675,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -695,9 +695,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -708,17 +708,17 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -728,9 +728,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -742,18 +742,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -762,9 +762,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -775,18 +775,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -795,9 +795,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -808,18 +808,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -828,9 +828,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -841,17 +841,17 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -861,9 +861,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -878,18 +878,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -898,9 +898,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -911,18 +911,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -931,9 +931,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -944,14 +944,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -968,18 +968,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -988,9 +988,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1001,18 +1001,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -1021,9 +1021,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -1034,14 +1034,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
index adff615fd43cd012ba5e45095140d7c2e0e5fbf0..5f4d6f86c6f2db94e7ed8ac728cb5334216c32a8 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
@@ -103,7 +103,7 @@ template< typename MeshReal,
 bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
@@ -122,22 +122,22 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //			{
 //			this->Entity.setCoordinates(CoordinatesType(i,j));
 //			this->Entity.refresh();
-//			neighbourEntities.refresh(Mesh,Entity.getIndex());
+//			neighborEntities.refresh(Mesh,Entity.getIndex());
 //				if(dofVector[this->Entity.getIndex()] > 0)
 //				{
-//					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //					{
-//						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare1111(i,j);
 //							else
 //								setupSquare1110(i,j);
 //						}
 //						else
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare1101(i,j);
 //							else
 //								setupSquare1100(i,j);
@@ -145,16 +145,16 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //					}
 //					else
 //					{
-//						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare1011(i,j);
 //							else
 //								setupSquare1010(i,j);
 //						}
 //						else
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare1001(i,j);
 //							else
 //								setupSquare1000(i,j);
@@ -163,18 +163,18 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //				}
 //				else
 //				{
-//					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+//					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
 //					{
-//						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare0111(i,j);
 //							else
 //								setupSquare0110(i,j);
 //						}
 //						else
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare0101(i,j);
 //							else
 //								setupSquare0100(i,j);
@@ -182,16 +182,16 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //					}
 //					else
 //					{
-//						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare0011(i,j);
 //							else
 //								setupSquare0010(i,j);
 //						}
 //						else
 //						{
-//							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
 //								setupSquare0001(i,j);
 //							else
 //								setupSquare0000(i,j);
@@ -287,30 +287,30 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 	if(map[Entity.getIndex()] != 0.0)
-		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 		Real value = dofVector2[Entity.getIndex()];
 		Real im = abs(1.0/map[Entity.getIndex()]);
 		Real a,b, tmp;
 		if( i == 0 )
-			a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+			a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 		else if( i == Mesh.getDimensions().x() - 1 )
-			a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+			a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-			a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-					 dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+			a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+					 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 		if( j == 0 )
-			b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+			b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 		else if( j == Mesh.getDimensions().y() - 1 )
-			b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+			b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-			b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-					 dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+			b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+					 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -362,11 +362,11 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -380,11 +380,11 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -398,15 +398,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -415,9 +415,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -430,15 +430,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -447,9 +447,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -462,15 +462,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -479,9 +479,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -494,14 +494,14 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -511,9 +511,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -527,15 +527,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -544,9 +544,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -559,15 +559,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -576,9 +576,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -591,15 +591,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -608,9 +608,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -623,14 +623,14 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -640,9 +640,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -659,15 +659,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -676,9 +676,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -691,15 +691,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -708,9 +708,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -723,11 +723,11 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -746,15 +746,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -763,9 +763,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -778,15 +778,15 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -795,9 +795,9 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -810,11 +810,11 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
index aa606ea47999ce739fb23d7ad6c38f937cf23f26..a23057e78c745e74467db4c4190d6f217024bc5a 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
@@ -17,9 +17,9 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
@@ -54,7 +54,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -131,7 +131,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -176,11 +176,11 @@ protected:
 #ifdef HAVE_CUDA
 //template<int sweep_t>
-__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i, int* changed);
-//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed);
+//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
-//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, tnlHost, int >, double, int >* solver);
+__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
+//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
 /*various implementtions.... choose one*/
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h
index 217377f3b77645f2ad2ede8db68bdada5f0885fb..8aca8f1b81db3a4a462d231e4724f3eef50e4723 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/main.h
@@ -47,26 +47,26 @@ int main( int argc, char* argv[] )
    if(dim == 2)
-		tnlFastSweeping<tnlGrid<2,double,tnlHost, int>, double, int> solver;
+		tnlFastSweeping<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
 			cerr << "Solver failed to initialize." << endl;
 			return EXIT_FAILURE;
-		checkCudaDevice;
 	   cout << "-------------------------------------------------------------" << endl;
 	   cout << "Starting solver..." << endl;
    else if(dim == 3)
-		tnlFastSweeping<tnlGrid<3,double,tnlHost, int>, double, int> solver;
+		tnlFastSweeping<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
 			cerr << "Solver failed to initialize." << endl;
 			return EXIT_FAILURE;
-		checkCudaDevice;
 	   cout << "-------------------------------------------------------------" << endl;
 	   cout << "Starting solver..." << endl;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h
index 55f145af95deba9752fb5c4f564bbbfa331bf153..96d26db7b5a2077d8e2199292f0e888b0171a5c2 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping.h
@@ -17,10 +17,10 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
 #include <functions/tnlMeshFunction.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
 #include <limits.h>
@@ -55,7 +55,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -135,7 +135,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h
index 48cbba5cb2e247296d41d3b7c2f935c8f75ce960..21e45020924453bd5676650028f6fdc6b210bc42 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h
@@ -89,7 +89,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	return true;
@@ -158,21 +158,21 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	for(int i = 0; i < 2*n ; i++)
-	checkCudaDevice;
 	for(int i = 0; i < 2*n ; i++)
-	checkCudaDevice;
 	for(int i = 2*n - 1; i > -1; i--)
@@ -180,7 +180,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -439,7 +439,7 @@ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
@@ -502,7 +502,7 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, doub
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h
index e6b20eb4153b04248a3bf72b2198d923aa21d748..1d4ee11b0856a618f7b86452828a2bd2e71bf24e 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h
@@ -89,7 +89,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	return true;
@@ -158,21 +158,21 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 ////	for(int i = 0; i < 2*n ; i++)
 //	{
 //		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,0);
 //		cudaDeviceSynchronize();
 //	}
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 ////	for(int i = 0; i < 2*n ; i++)
 //	{
 //		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,0);
 //		cudaDeviceSynchronize();
 //	}
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 ////	for(int i = 2*n - 1; i > -1; i--)
 //	{
 //		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,0);
@@ -180,7 +180,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 //	}
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -438,7 +438,7 @@ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 	//int gx = threadIdx.x;
@@ -568,7 +568,7 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, doub
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h
index d05b649f29d222bc96649d3d6ed92a7fa11cf14e..fad3bc293cc452c1fe077e892b13b51750a78bb6 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h
@@ -104,7 +104,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	return true;
@@ -174,21 +174,21 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 //	for(int i = 0; i < 2*m -1; i++)
 //	{
 //		runCUDA<2><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i);
 //		cudaDeviceSynchronize();
 //	}
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 //	for(int i = 0; i < 2*m -1; i++)
 //	{
 //		runCUDA<4><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i);
 //		cudaDeviceSynchronize();
 //	}
 //	cudaDeviceSynchronize();
-//	checkCudaDevice;
 //	for(int i = 0; i < 2*m -1; i++)
 //	{
 //		runCUDA<8><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,8,i);
@@ -234,7 +234,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 //		cudaDeviceSynchronize();
 //	}
-	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -492,7 +492,7 @@ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+__global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 	if(blockIdx.x+blockIdx.y == k)
@@ -516,7 +516,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-	__global__ void runCUDA<2>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+	__global__ void runCUDA<2>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 	if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
@@ -538,7 +538,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
 	}			/*---------------------------------------------------------------------------------------------------------------------------*/
-	__global__ void runCUDA<4>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+	__global__ void runCUDA<4>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 	if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2)
@@ -564,7 +564,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-	__global__ void runCUDA<8>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+	__global__ void runCUDA<8>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 	if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2)
@@ -595,7 +595,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-		__global__ void runCUDA<5>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+		__global__ void runCUDA<5>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 			if(blockIdx.x+blockIdx.y == k)
@@ -640,7 +640,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-		__global__ void runCUDA<10>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+		__global__ void runCUDA<10>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 			if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
@@ -687,7 +687,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-	__global__ void runCUDA<15>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+	__global__ void runCUDA<15>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 		if(blockIdx.x+blockIdx.y == k)
@@ -806,7 +806,7 @@ __global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, d
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
@@ -825,7 +825,7 @@ __global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, dou
-//__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int k)
+//__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
 //	if(sweep==1 && blockIdx.x+blockIdx.y == k)
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h
index 230b6ce436bb53c40172eb43cf8b7e668f4bf891..dd878d4953d46ac014622a43e03ad0e8b6b9a492 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h
@@ -122,7 +122,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	return true;
@@ -147,7 +147,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -176,32 +176,32 @@ template< typename MeshReal,
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real value = cudaDofVector2[Entity.getIndex()];
 	Real a,b, tmp;
 	if( i == 0 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 	if( j == 0 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -227,11 +227,11 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	int i = threadIdx.x + blockDim.x*blockIdx.x;
 	int j = blockDim.y*blockIdx.y + threadIdx.y;
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	int gid = Entity.getIndex();
@@ -248,18 +248,18 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 		if(cudaDofVector[Entity.getIndex()] > 0)
-			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -267,16 +267,16 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -285,18 +285,18 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -304,16 +304,16 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -357,7 +357,7 @@ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
 	int gx = 0;
@@ -464,7 +464,7 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, doub
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
@@ -523,14 +523,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-//	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+//	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
 //	Entity.setCoordinates(CoordinatesType(i,j));
 //	Entity.refresh();
-//	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+//	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 //	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -542,14 +542,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-//	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+//	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
 //	Entity.setCoordinates(CoordinatesType(i,j));
 //	Entity.refresh();
-//	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+//	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 //	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -561,18 +561,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -581,9 +581,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -594,18 +594,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -614,9 +614,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -627,18 +627,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -647,9 +647,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -660,17 +660,17 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -680,9 +680,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -694,18 +694,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -714,9 +714,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -727,18 +727,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -747,9 +747,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -760,18 +760,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -780,9 +780,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -793,17 +793,17 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -813,9 +813,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -830,18 +830,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -850,9 +850,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -863,18 +863,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -883,9 +883,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -896,14 +896,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -920,18 +920,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -940,9 +940,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -953,18 +953,18 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real al,be, a,b,c,s;
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
 	b = 1.0;
@@ -973,9 +973,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -986,14 +986,14 @@ template< typename MeshReal,
           typename Index >
 void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h
index f9f3e28f63fc7a3ad88b98980a4454035fd299b4..4f8c90df818b7121663e8053b0577d37fd119055 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h
@@ -112,7 +112,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	return true;
@@ -179,7 +179,7 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -491,7 +491,7 @@ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
 	extern __shared__ double u[];
@@ -677,7 +677,7 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, doub
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h
index 920806c211483d1c8cc3dabd2e194704dbb560bd..8fd78cea97230d16135ddd03903442bb9922e0b8 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping2D_impl.h
@@ -93,7 +93,7 @@ template< typename MeshReal,
 bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
@@ -105,22 +105,22 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-			neighbourEntities.refresh(Mesh,Entity.getIndex());
+			neighborEntities.refresh(Mesh,Entity.getIndex());
 				if(dofVector[this->Entity.getIndex()] > 0)
-					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -128,16 +128,16 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -146,18 +146,18 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -165,16 +165,16 @@ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
@@ -397,29 +397,29 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
 	Real value = dofVector2[Entity.getIndex()];
 	Real a,b, tmp;
 	if( i == 0 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
+				 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
 	if( j == 0 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
@@ -466,11 +466,11 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -484,11 +484,11 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
 //	this->Entity.setCoordinates(CoordinatesType(i,j));
 //	this->Entity.refresh();
-//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	auto neighborEntities =  Entity.getNeighborEntities();
 //	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -502,15 +502,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -519,9 +519,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -534,15 +534,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -551,9 +551,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -566,15 +566,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -583,9 +583,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -598,14 +598,14 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -615,9 +615,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -631,15 +631,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
 	a = be/al;
@@ -648,9 +648,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -663,15 +663,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = be/al;
@@ -680,9 +680,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -695,15 +695,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = be/al;
@@ -712,9 +712,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -727,14 +727,14 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
 	a = be/al;
@@ -744,9 +744,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -763,15 +763,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -780,9 +780,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -795,15 +795,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -812,9 +812,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -827,11 +827,11 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -850,15 +850,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
 	a = al-be;
@@ -867,9 +867,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -882,15 +882,15 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
 	Real al,be, a,b,c,s;
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
 	a = al-be;
@@ -899,9 +899,9 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
@@ -914,11 +914,11 @@ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > ::
-	auto neighbourEntities =  Entity.getNeighbourEntities();
+	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h
index da045676ae1f119e73cc4539732b31d5d632ca23..51bb61716d1b49e2921d974e93692cdf0a34ddf1 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h
@@ -111,10 +111,10 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > ::
 	dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1);
-	checkCudaDevice;
-	checkCudaDevice;
 	return true;
@@ -139,7 +139,7 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > ::
-	checkCudaDevice;
 	cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
@@ -165,41 +165,41 @@ template< typename MeshReal,
 void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
 	Real value = cudaDofVector2[Entity.getIndex()];
 	Real a,b,c, tmp;
 	if( i == 0 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-		a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()] );
+		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()] );
 	if( j == 0 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-		b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()] );
+		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()] );
 	if( k == 0 )
-		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
 	else if( k == Mesh.getDimensions().z() - 1 )
-		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-		c = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()],
-				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()] );
+		c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()],
+				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()] );
 	Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c);
@@ -222,7 +222,7 @@ template< typename MeshReal,
 bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k)
-	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
 	int gid = Entity.getIndex();
@@ -255,7 +255,7 @@ Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > ::
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
 	int gx = 0;
@@ -474,7 +474,7 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, doub
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, double, int >* solver)
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver)
 	int gx = threadIdx.x + blockDim.x*blockIdx.x;
 	int gy = blockDim.y*blockIdx.y + threadIdx.y;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h
index dd5681e6d88b3fa666f210d0fe24f2423ac78b68..dc4fd4f866c19b09d17ad8b2c19d2907d581d1c8 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping3D_impl.h
@@ -237,38 +237,38 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > ::
-	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
 	Real value = dofVector2[Entity.getIndex()];
 	Real a,b,c, tmp;
 	if( i == 0 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()];
+		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()];
 	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()] );
+		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()] );
 	if( j == 0 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()];
 	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()];
+		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()];
-		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()] );
+		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()] );
 	if( k == 0 )
-		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()];
+		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()];
 	else if( k == Mesh.getDimensions().z() - 1 )
-		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()];
+		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()];
-		c = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()],
-				 dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()] );
+		c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()],
+				 dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()] );
 	Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h
index 310cdf3f3028eb68e71c31d821ffea1538615724..f531da431bfec5d16da8ea7deabe6595031a0873 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/fast-sweeping/tnlFastSweeping_CUDA.h
@@ -17,9 +17,9 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
@@ -54,7 +54,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -129,7 +129,7 @@ public:
 	typedef Device DeviceType;
 	typedef Index IndexType;
 	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
 	typedef typename MeshType::CoordinatesType CoordinatesType;
@@ -174,11 +174,11 @@ protected:
 #ifdef HAVE_CUDA
 //template<int sweep_t>
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i);
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
+__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, double, int >* solver);
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
+__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
 /*various implementtions.... choose one*/
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h
index 55a0942f81721c1e90e670aaae18941471f6e13d..911f0a29d9f0a82aa73767cfcee407093bc2eb66 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/main.h
@@ -41,24 +41,24 @@ int main( int argc, char* argv[] )
 	tnlDeviceEnum device;
-	device = tnlHostDevice;
+	device = TNL::Devices::HostDevice;
 	const int& dim = parameters.getParameter< int >( "dim" );
 	if(dim == 2)
-	   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeHost;
+	   typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
 /*#ifdef HAVE_CUDA
 		   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
 #ifndef HAVE_CUDA*/
-	   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeDevice;
+	   typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-	   if(device==tnlHostDevice)
+	   if(device==TNL::Devices::HostDevice)
-		   typedef tnlHost Device;
+		   typedef TNL::Devices::Host Device;
 		   tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
index bf0a388166c89946727b95687eca1a74764739c3..400e163c9dcc8d536a478a0952aabf8ccbb1a2d8 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
@@ -18,10 +18,10 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
 #include <functions/tnlMeshFunction.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
 #include <limits.h>
@@ -31,7 +31,6 @@
 #include <ctime>
 #ifdef HAVE_CUDA
-#include <cuda.h>
 #include <core/tnlCuda.h>
@@ -53,13 +52,13 @@ public:
 	typedef SchemeDevice SchemeTypeDevice;
 	typedef SchemeHost SchemeTypeHost;
 	typedef Device DeviceType;
-	typedef tnlVector< double, tnlHost, int > VectorType;
-	typedef tnlVector< int, tnlHost, int > IntVectorType;
-	typedef tnlGrid< 2, double, tnlHost, int > MeshType;
+	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
+	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
+	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType;
 #ifdef HAVE_CUDA
-	typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
-	typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
-	typedef tnlGrid< 2, double, tnlHost, int > MeshTypeCUDA;
+	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
+	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
+	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA;
 	bool init( const Config::ParameterContainer& parameters );
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
index 1cb730e42d0b0b72782e61e011b6081d2eddc6ef..2925df8b236f13b0d3c460b5ca9b2970ff730e80 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
@@ -31,7 +31,7 @@
 template< typename SchemeHost, typename SchemeDevice, typename Device>
 tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver()
-	this->device = tnlHostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU
+	this->device = TNL::Devices::HostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
 #ifdef HAVE_CUDA
 	if(this->device == tnlCudaDevice)
@@ -123,7 +123,7 @@ bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init
 		cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double));
 		cudaMalloc(&(this->runcuda), sizeof(int));
-		checkCudaDevice;
 		int* tmpUC;
 		cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
@@ -131,19 +131,19 @@ bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init
 		initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map);
-		checkCudaDevice;
 		double* tmpu = NULL;
 		cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
 		cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
 		cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-		checkCudaDevice;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 		VectorType tmp_map;
 		tmp_map.setSize(this->n * this->n);
@@ -187,21 +187,21 @@ bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init
 	else if(this->device == tnlCudaDevice)
-		checkCudaDevice;
 		dim3 threadsPerBlock(this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows);
-		checkCudaDevice;
 		initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-		checkCudaDevice;
 	this->currentStep = 1;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 #ifdef HAVE_CUDA
 	else if(this->device == tnlCudaDevice)
@@ -211,10 +211,10 @@ bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init
 		synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-		checkCudaDevice;
 		synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		checkCudaDevice;
@@ -226,7 +226,7 @@ bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init
 template< typename SchemeHost, typename SchemeDevice, typename Device>
 void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run()
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 		while ((this->boundaryConditions.max() > 0 )/* || !end*/)
@@ -360,12 +360,12 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run(
 		dim3 threadsPerBlock(this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows);
-		checkCudaDevice;
 		bool* tmpb;
 		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		int i = 1;
 		time_diff = 0.0;
@@ -377,7 +377,7 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run(
 				end_cuda = false;
-			checkCudaDevice;
 			start = std::clock();
 			runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
@@ -386,10 +386,10 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run(
 			//start = std::clock();
 			synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			checkCudaDevice;
 			synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			checkCudaDevice;
 			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
 			cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost);
@@ -738,7 +738,7 @@ tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgri
    double maxResidue( 1.0 );
    tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
    for( int i = 0; i < u.getSize(); i ++ )
@@ -758,9 +758,9 @@ tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgri
 			Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()));
-			neighbourEntities.refresh(subMesh,Entity.getIndex());
+			neighborEntities.refresh(subMesh,Entity.getIndex());
 			if(map[i] != 0.0)
-				fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighbourEntities,map);
+				fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities,map);
       maxResidue = fu. absMax();
@@ -909,10 +909,10 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS
    tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-   neighbourEntities.refresh(subMesh,Entity.getIndex());
+   neighborEntities.refresh(subMesh,Entity.getIndex());
 	if(map_local[l] == 0.0)
@@ -929,7 +929,7 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j), u, time, boundaryCondition, neighbourEntities, map_local);
+		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j), u, time, boundaryCondition, neighborEntities, map_local);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h
index 3a976dae469a77dc33559cf562f722913de0ef28..178e816aff7c1fe2c46b16529b1b263b24796879 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/main.h
@@ -42,24 +42,24 @@ int main( int argc, char* argv[] )
    //if (parameters.GetParameter <String>("scheme") == "godunov")
    tnlDeviceEnum device;
-   device = tnlHostDevice;
+   device = TNL::Devices::HostDevice;
    const int& dim = parameters.getParameter< int >( "dim" );
   if(dim == 2)
-	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeHost;
+	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
 		/*#ifdef HAVE_CUDA
 		   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
 		#ifndef HAVE_CUDA*/
-	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeDevice;
+	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-	   if(device==tnlHostDevice)
+	   if(device==TNL::Devices::HostDevice)
-		   typedef tnlHost Device;
+		   typedef TNL::Devices::Host Device;
 		   tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
@@ -92,17 +92,17 @@ int main( int argc, char* argv[] )
   else if(dim == 3)
-	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeHost;
+	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
 		/*#ifdef HAVE_CUDA
 		   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
 		#ifndef HAVE_CUDA*/
-	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,tnlHost, int>, double, int > SchemeTypeDevice;
+	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-	   if(device==tnlHostDevice)
+	   if(device==TNL::Devices::HostDevice)
-		   typedef tnlHost Device;
+		   typedef TNL::Devices::Host Device;
 		   tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver;
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
index f79752b7187cf9de4f58299fa9add9ce72f2c407..19cdd949359d4349172af820def49169146c8717 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
@@ -18,10 +18,10 @@
 #include <TNL/Config/ParameterContainer.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <TNL/Containers/StaticVector.h>
 #include <functions/tnlMeshFunction.h>
-#include <core/tnlHost.h>
+#include <TNL/Devices/Host.h>
 #include <mesh/tnlGrid.h>
 #include <mesh/grids/tnlGridEntity.h>
 #include <limits.h>
@@ -32,7 +32,6 @@
 #include <ctime>
 #ifdef HAVE_CUDA
-#include <cuda.h>
 #include <core/tnlCuda.h>
@@ -54,13 +53,13 @@ public:
 	typedef SchemeDevice SchemeTypeDevice;
 	typedef SchemeHost SchemeTypeHost;
 	typedef Device DeviceType;
-	typedef tnlVector< double, tnlHost, int > VectorType;
-	typedef tnlVector< int, tnlHost, int > IntVectorType;
-	typedef tnlGrid< 2, double, tnlHost, int > MeshType;
+	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
+	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
+	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType;
 #ifdef HAVE_CUDA
-	typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
-	typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
-	typedef tnlGrid< 2, double, tnlHost, int > MeshTypeCUDA;
+	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
+	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
+	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA;
 	bool init( const Config::ParameterContainer& parameters );
@@ -161,7 +160,7 @@ public:
 	//__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, tnlHost, int >* caller);*/
+	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/
@@ -181,13 +180,13 @@ public:
 		typedef SchemeDevice SchemeTypeDevice;
 		typedef SchemeHost SchemeTypeHost;
 		typedef Device DeviceType;
-		typedef tnlVector< double, tnlHost, int > VectorType;
-		typedef tnlVector< int, tnlHost, int > IntVectorType;
-		typedef tnlGrid< 3, double, tnlHost, int > MeshType;
+		typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
+		typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
+		typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshType;
 	#ifdef HAVE_CUDA
-		typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
-		typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
-		typedef tnlGrid< 3, double, tnlHost, int > MeshTypeCUDA;
+		typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
+		typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
+		typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshTypeCUDA;
 		bool init( const Config::ParameterContainer& parameters );
@@ -285,7 +284,7 @@ public:
 	//__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, tnlHost, int >* caller);*/
+	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h
index 6279fbb805733172ebda8a78c68a128d0d33f92c..8370b069d5c96bfd7921390e46e4979131f15b36 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h
@@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device>
 tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
 	cout << "a" << endl;
-	this->device = tnlCudaDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU
+	this->device = tnlCudaDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
 #ifdef HAVE_CUDA
 	if(this->device == tnlCudaDevice)
@@ -124,14 +124,14 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
 	cudaMalloc(&(this->runcuda), sizeof(int));
-	checkCudaDevice;
 	int* tmpUC;
 	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
 	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
 	initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC);
-	checkCudaDevice;
 	//cout << "s " << endl;
 	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
 	double* tmpu = NULL;
@@ -140,13 +140,13 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 	//printf("%p %p \n",tmpu,tmpw);
 	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-	checkCudaDevice;
 	//cout << "s "<< endl;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 	for(int i = 0; i < this->subgridValues.getSize(); i++)
@@ -182,11 +182,11 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 //		cout << "pre 1 kernel" << endl;
-		checkCudaDevice;
 		dim3 threadsPerBlock(this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows);
-		checkCudaDevice;
 		initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
 //		cout << "post 1 kernel" << endl;
@@ -196,7 +196,7 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 	this->currentStep = 1;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 #ifdef HAVE_CUDA
 	else if(this->device == tnlCudaDevice)
@@ -208,17 +208,17 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
 		//cout << this->tmpw << "   " <<  test[0] <<"   " << test[1] << "   " <<test[2] << "   " <<test[3] << endl;
-		checkCudaDevice;
 		synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-		checkCudaDevice;
 		synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		checkCudaDevice;
 		//cout << test[0] << "   " <<test[1] <<"   " << test[2] << "   " <<test[3] << endl;
 		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//checkCudaDevice;
 		//cout << this->tmpw << "   " <<  test[0] << "   " <<test[1] << "   " <<test[2] <<"   " << test[3] << endl;
@@ -233,7 +233,7 @@ bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 template< typename SchemeHost, typename SchemeDevice, typename Device>
 void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run()
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 	bool end = false;
@@ -343,17 +343,17 @@ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 		dim3 threadsPerBlock(this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows);
-		checkCudaDevice;
 		//cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice);
 		//cout << "fn" << endl;
 		bool* tmpb;
 		//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-		//checkCudaDevice;
 		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		//cout << "fn" << endl;
 		int i = 1;
 		time_diff = 0.0;
@@ -366,7 +366,7 @@ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 				end_cuda = false;
 			//cout << "a" << endl;
-			checkCudaDevice;
 			start = std::clock();
 			runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
 			//cout << "a" << endl;
@@ -376,10 +376,10 @@ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 			//start = std::clock();
 			synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			checkCudaDevice;
 			synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			checkCudaDevice;
 			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
@@ -975,7 +975,7 @@ tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSu
    double maxResidue( 1.0 );
    //double lastResidue( 10000.0 );
    tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
    while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/)
@@ -986,8 +986,8 @@ tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSu
 			Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()));
-			neighbourEntities.refresh(subMesh,Entity.getIndex());
-    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighbourEntities);
+			neighborEntities.refresh(subMesh,Entity.getIndex());
+    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities);
       maxResidue = fu. absMax();
@@ -1188,16 +1188,16 @@ void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::
 //   if( time + currentTau > finalTime ) currentTau = finalTime - time;
    tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-   neighbourEntities.refresh(subMesh,Entity.getIndex());
+   neighborEntities.refresh(subMesh,Entity.getIndex());
    while( time < finalTime )
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighbourEntities);
+		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighborEntities);
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
index 12d9003099643c923fdd6f4be18f2b07b35ebf3d..b0871824cbf67f7ef18d276903ec84d8eeb0a109 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
@@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device>
 tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
 	cout << "a" << endl;
-	this->device = tnlHostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU
+	this->device = TNL::Devices::HostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
 #ifdef HAVE_CUDA
 	if(this->device == tnlCudaDevice)
@@ -124,14 +124,14 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
 	cudaMalloc(&(this->runcuda), sizeof(int));
-	checkCudaDevice;
 	int* tmpUC;
 	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
 	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
 	initCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC);
-	checkCudaDevice;
 	//cout << "s " << endl;
 	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
 	double* tmpu = NULL;
@@ -140,13 +140,13 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 	//printf("%p %p \n",tmpu,tmpw);
 	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-	checkCudaDevice;
 	//cout << "s "<< endl;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 #pragma omp parallel for num_threads(4) schedule(dynamic)
@@ -195,11 +195,11 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 //		cout << "pre 1 kernel" << endl;
-		checkCudaDevice;
 		dim3 threadsPerBlock(this->n, this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels);
-		checkCudaDevice;
 		initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
 //		cout << "post 1 kernel" << endl;
@@ -209,7 +209,7 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 	this->currentStep = 1;
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 #ifdef HAVE_CUDA
 	else if(this->device == tnlCudaDevice)
@@ -221,17 +221,17 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
 		//cout << this->tmpw << "   " <<  test[0] <<"   " << test[1] << "   " <<test[2] << "   " <<test[3] << endl;
-		checkCudaDevice;
 		synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
 		cout << cudaGetErrorString(cudaDeviceSynchronize()) << endl;
-		checkCudaDevice;
 		synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		checkCudaDevice;
 		//cout << test[0] << "   " <<test[1] <<"   " << test[2] << "   " <<test[3] << endl;
 		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//checkCudaDevice;
 		//cout << this->tmpw << "   " <<  test[0] << "   " <<test[1] << "   " <<test[2] <<"   " << test[3] << endl;
@@ -246,7 +246,7 @@ bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 template< typename SchemeHost, typename SchemeDevice, typename Device>
 void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::run()
-	if(this->device == tnlHostDevice)
+	if(this->device == TNL::Devices::HostDevice)
 	bool end = false;
@@ -381,17 +381,17 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 		dim3 threadsPerBlock(this->n, this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels);
-		checkCudaDevice;
 		//cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice);
 		//cout << "fn" << endl;
 		bool* tmpb;
 		//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-		//checkCudaDevice;
 		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		checkCudaDevice;
 		//cout << "fn" << endl;
 		int i = 1;
 		time_diff = 0.0;
@@ -404,7 +404,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 				end_cuda = false;
 			//cout << "a" << endl;
-			checkCudaDevice;
 			start = std::clock();
 			runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
 			//cout << "a" << endl;
@@ -414,10 +414,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 			//start = std::clock();
 			synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			checkCudaDevice;
 			synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			checkCudaDevice;
 			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
@@ -934,7 +934,7 @@ tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSu
    double maxResidue( 1.0 );
    //double lastResidue( 10000.0 );
    tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
    while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/)
@@ -952,9 +952,9 @@ tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSu
 //			cout <<"c" << coords << endl;
 //			cout << "d" <<endl;
-			neighbourEntities.refresh(subMesh,Entity.getIndex());
+			neighborEntities.refresh(subMesh,Entity.getIndex());
 //			cout << "e" <<endl;
-    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, coords,u, time, boundaryCondition, neighbourEntities );
+    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, coords,u, time, boundaryCondition, neighborEntities );
 //    	  cout << "f" <<endl;
       maxResidue = fu. absMax();
@@ -1202,10 +1202,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
    if( boundaryCondition == 0 ) finalTime *= 2.0;
    tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-   neighbourEntities.refresh(subMesh,Entity.getIndex());
+   neighborEntities.refresh(subMesh,Entity.getIndex());
    while( time < finalTime )
@@ -1214,7 +1214,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighbourEntities);
+		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighborEntities);
 		  if(abs(fu) > 0.0)
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/CMakeLists.txt b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/HamiltonJacobiProblem.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/HamiltonJacobiProblem.h
index a4a187cee17f9d8f8c43537347208d1caf10bc56..0523f7e90e1ccd0cef6683ffcff34b324305eee7 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/HamiltonJacobiProblem.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/HamiltonJacobiProblem.h
@@ -20,7 +20,7 @@
 #include <solvers/preconditioners/tnlDummyPreconditioner.h>
 #include <solvers/tnlSolverMonitor.h>
 #include <core/tnlLogger.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 #include <solvers/pde/tnlExplicitUpdater.h>
 #include <solvers/pde/tnlLinearSystemAssembler.h>
 #include <functions/tnlMeshFunction.h>
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
index c2983fac2c12e1b7791bd86973a712a5fcc6c948..b924f72331e7126f3d8accc7392e9e7554827890 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodsBase_impl.h
@@ -30,10 +30,10 @@ initInterface( const MeshFunctionType& input,
       const RealType& c = input( cell );      
       if( ! cell.isBoundaryEntity()  )
-         const auto& neighbours = cell.getNeighbourEntities();
+         const auto& neighbors = cell.getNeighborEntities();
          //const IndexType& c = cell.getIndex();
-         const IndexType e = neighbours.template getEntityIndex<  1 >();
-         const IndexType w = neighbours.template getEntityIndex< -1 >();
+         const IndexType e = neighbors.template getEntityIndex<  1 >();
+         const IndexType w = neighbors.template getEntityIndex< -1 >();
          if( c * input[ e ] <= 0 || c * input[ w ] <= 0 )
@@ -84,11 +84,11 @@ initInterface( const MeshFunctionType& input,
          const RealType& c = input( cell );
          if( ! cell.isBoundaryEntity()  )
-            auto neighbours = cell.getNeighbourEntities();
-            const IndexType e = neighbours.template getEntityIndex<  1,  0 >();
-            const IndexType w = neighbours.template getEntityIndex< -1,  0 >();
-            const IndexType n = neighbours.template getEntityIndex<  0,  1 >();
-            const IndexType s = neighbours.template getEntityIndex<  0, -1 >();            
+            auto neighbors = cell.getNeighborEntities();
+            const IndexType e = neighbors.template getEntityIndex<  1,  0 >();
+            const IndexType w = neighbors.template getEntityIndex< -1,  0 >();
+            const IndexType n = neighbors.template getEntityIndex<  0,  1 >();
+            const IndexType s = neighbors.template getEntityIndex<  0, -1 >();            
             if( c * input[ e ] <= 0 || c * input[ w ] <= 0 ||
                 c * input[ n ] <= 0 || c * input[ s ] <= 0 )
@@ -113,7 +113,7 @@ tnlDirectEikonalMethodsBase< tnlGrid< 2, Real, Device, Index > >::
 updateCell( MeshFunctionType& u,
             const MeshEntity& cell )
-   const auto& neighbourEntities = cell.template getNeighbourEntities< 2 >();
+   const auto& neighborEntities = cell.template getNeighborEntities< 2 >();
    const MeshType& mesh = cell.getMesh();
    const RealType& h = mesh.getSpaceSteps().x(); 
@@ -121,23 +121,23 @@ updateCell( MeshFunctionType& u,
    Real a, b, tmp;
    if( cell.getCoordinates().x() == 0 )
-      a = u[ neighbourEntities.template getEntityIndex< 1,  0 >() ];
+      a = u[ neighborEntities.template getEntityIndex< 1,  0 >() ];
    else if( cell.getCoordinates().x() == mesh.getDimensions().x() - 1 )
-      a = u[ neighbourEntities.template getEntityIndex< -1,  0 >() ];
+      a = u[ neighborEntities.template getEntityIndex< -1,  0 >() ];
-      a = ArgAbsMin( u[ neighbourEntities.template getEntityIndex< -1,  0 >() ],
-                     u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] );
+      a = ArgAbsMin( u[ neighborEntities.template getEntityIndex< -1,  0 >() ],
+                     u[ neighborEntities.template getEntityIndex<  1,  0 >() ] );
    if( cell.getCoordinates().y() == 0 )
-      b = u[ neighbourEntities.template getEntityIndex< 0,  1 >()];
+      b = u[ neighborEntities.template getEntityIndex< 0,  1 >()];
    else if( cell.getCoordinates().y() == mesh.getDimensions().y() - 1 )
-      b = u[ neighbourEntities.template getEntityIndex< 0,  -1 >() ];
+      b = u[ neighborEntities.template getEntityIndex< 0,  -1 >() ];
-      b = ArgAbsMin( u[ neighbourEntities.template getEntityIndex< 0,  -1 >() ],
-                     u[ neighbourEntities.template getEntityIndex< 0,   1 >() ] );
+      b = ArgAbsMin( u[ neighborEntities.template getEntityIndex< 0,  -1 >() ],
+                     u[ neighborEntities.template getEntityIndex< 0,   1 >() ] );
    if( fabs( a ) == tnlTypeInfo< Real >::getMaxValue() && 
@@ -195,14 +195,14 @@ initInterface( const MeshFunctionType& input,
             const RealType& c = input( cell );
             if( ! cell.isBoundaryEntity() )
-               auto neighbours = cell.getNeighbourEntities();
+               auto neighbors = cell.getNeighborEntities();
                //const IndexType& c = cell.getIndex();
-               const IndexType e = neighbours.template getEntityIndex<  1,  0,  0 >();
-               const IndexType w = neighbours.template getEntityIndex< -1,  0,  0 >();
-               const IndexType n = neighbours.template getEntityIndex<  0,  1,  0 >();
-               const IndexType s = neighbours.template getEntityIndex<  0, -1,  0 >();
-               const IndexType t = neighbours.template getEntityIndex<  0,  0,  1 >();
-               const IndexType b = neighbours.template getEntityIndex<  0,  0, -1 >();
+               const IndexType e = neighbors.template getEntityIndex<  1,  0,  0 >();
+               const IndexType w = neighbors.template getEntityIndex< -1,  0,  0 >();
+               const IndexType n = neighbors.template getEntityIndex<  0,  1,  0 >();
+               const IndexType s = neighbors.template getEntityIndex<  0, -1,  0 >();
+               const IndexType t = neighbors.template getEntityIndex<  0,  0,  1 >();
+               const IndexType b = neighbors.template getEntityIndex<  0,  0, -1 >();
                if( c * input[ e ] <= 0 || c * input[ w ] <= 0 ||
                    c * input[ n ] <= 0 || c * input[ s ] <= 0 ||
diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
index 671448e5bfb282d3f9b84a6c3c71e99e8c403432..59ddc2a690b5e6aecb62d5cf4c0997b7c567ff82 100644
--- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
+++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod.h
@@ -30,13 +30,13 @@ template< typename Real,
 class tnlFastSweepingMethod< tnlGrid< 1, Real, Device, Index >, Anisotropy >
    : public tnlDirectEikonalMethodsBase< tnlGrid< 1, Real, Device, Index > >
-   static_assert(  std::is_same< Device, tnlHost >::value, "The fast sweeping method works only on CPU." );
+   static_assert(  std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." );
       typedef tnlGrid< 1, Real, Device, Index > MeshType;
       typedef Real RealType;
-      typedef tnlHost DeviceType;
+      typedef TNL::Devices::Host DeviceType;
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< tnlGrid< 1, Real, Device, Index > > BaseType;
@@ -67,13 +67,13 @@ template< typename Real,
 class tnlFastSweepingMethod< tnlGrid< 2, Real, Device, Index >, Anisotropy >
    : public tnlDirectEikonalMethodsBase< tnlGrid< 2, Real, Device, Index > >
-   static_assert(  std::is_same< Device, tnlHost >::value, "The fast sweeping method works only on CPU." );
+   static_assert(  std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." );
       typedef tnlGrid< 2, Real, Device, Index > MeshType;
       typedef Real RealType;
-      typedef tnlHost DeviceType;
+      typedef TNL::Devices::Host DeviceType;
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< tnlGrid< 2, Real, Device, Index > > BaseType;
@@ -104,13 +104,13 @@ template< typename Real,
 class tnlFastSweepingMethod< tnlGrid< 3, Real, Device, Index >, Anisotropy >
    : public tnlDirectEikonalMethodsBase< tnlGrid< 3, Real, Device, Index > >
-   static_assert(  std::is_same< Device, tnlHost >::value, "The fast sweeping method works only on CPU." );
+   static_assert(  std::is_same< Device, TNL::Devices::Host >::value, "The fast sweeping method works only on CPU." );
       typedef tnlGrid< 3, Real, Device, Index > MeshType;
       typedef Real RealType;
-      typedef tnlHost DeviceType;
+      typedef TNL::Devices::Host DeviceType;
       typedef Index IndexType;
       typedef Anisotropy AnisotropyType;
       typedef tnlDirectEikonalMethodsBase< tnlGrid< 3, Real, Device, Index > > BaseType;
diff --git a/src/TNL/File.cpp b/src/TNL/File.cpp
index d6e16b17ac89c0d0cd5c9b3f262f47b7eb2ad28c..885173cd40a320f67d149f5b1be6193012059600 100644
--- a/src/TNL/File.cpp
+++ b/src/TNL/File.cpp
@@ -15,7 +15,7 @@ namespace TNL {
 int File :: verbose = 0;
 File :: File()
-: mode( tnlUndefinedMode ),
+: mode( IOMode::undefined ),
   file( NULL ),
   fileOK( false ),
   writtenElements( 0 ),
@@ -23,26 +23,37 @@ File :: File()
+File :: ~File()
+   // destroying a file without closing is a memory leak
+   // (an open file descriptor is left behind, on Linux there is typically
+   // only a limited number of descriptors available to each process)
+   close();
 bool File :: open( const String& fileName,
-                      const tnlIOMode mode )
+                   const IOMode mode )
+   // close the existing file to avoid memory leaks
+   this->close();
    this->fileName = fileName;
    if( verbose )
       std::cout << "Opening file " << fileName;
-      if( mode == tnlReadMode )
+      if( mode == IOMode::read )
          std::cout << " for reading... " << std::endl;
          std::cout << " for writing ... " << std::endl;
-   if( mode == tnlReadMode )
-      file = fopen( fileName. getString(), "r" );
-   if( mode == tnlWriteMode )
-      file = fopen( fileName. getString(), "w" );
+   if( mode == IOMode::read )
+      file = std::fopen( fileName.getString(), "rb" );
+   if( mode == IOMode::write )
+      file = std::fopen( fileName.getString(), "wb" );
    if( file ==  NULL )
       std::cerr << "I am not able to open the file " << fileName << ". ";
-      perror( "" );
+      std::perror( "" );
       return false;
    this->fileOK = true;
@@ -55,24 +66,25 @@ bool File :: close()
    if( verbose )
       std::cout << "Closing the file " << getFileName() << " ... " << std::endl;
-   if( fclose( file ) != 0 )
+   if( file && std::fclose( file ) != 0 )
       std::cerr << "I was not able to close the file " << fileName << " properly!" << std::endl;
       return false;
+   // reset all attributes
+   mode = IOMode::undefined;
+   file = NULL;
+   fileOK = false;
+   fileName = "";
    readElements = writtenElements = 0;
    return true;
 bool fileExists( const String& fileName )
   std::fstream file;
-  file.open( fileName. getString(), std::ios::in );
-  bool result( true );
-  if( ! file )
-     result = false;
-  file.close();
-  return result;
+  file.open( fileName.getString(), std::ios::in );
+  return ! file.fail();
 } // namespace TNL
diff --git a/src/TNL/File.h b/src/TNL/File.h
index 1eea42eae61b67867bfeb7218b0a1801932f2c14..78861f5690a86901b170e0056830fe59250faf6e 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -12,22 +12,22 @@
 #include <iostream>
 #include <fstream>
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef HAVE_CUDA
-   #include <cuda_runtime.h>
+#include <cstdio>
 #include <TNL/Assert.h>
 #include <TNL/String.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/MIC.h>
 namespace TNL {
-enum tnlIOMode { tnlUndefinedMode = 0,
-                 tnlReadMode = 1,
-                 tnlWriteMode = 2 };
+enum class IOMode
+   undefined = 0,
+   read = 1,
+   write = 2
 /* When we need to transfer data between the GPU and the CPU we use
  * 5 MB buffer. This size should ensure good performance -- see.
@@ -41,57 +41,43 @@ const size_t tnlFileGPUvsCPUTransferBufferSize = 5 * 2<<20;
 class File
-   tnlIOMode mode;
+   IOMode mode;
-   FILE* file;
+   std::FILE* file;
    bool fileOK;
    String fileName;
-   size_t writtenElements;
+   std::size_t writtenElements;
-   size_t readElements;
+   std::size_t readElements;
+   ~File();
    bool open( const String& fileName,
-              const tnlIOMode mode );
+              const IOMode mode );
-	const String& getFileName() const
+   const String& getFileName() const
-	   return this->fileName;
+      return this->fileName;
-	long int getReadElements() const
-	{
-	   return this->readElements;
-	}
-	long int getWrittenElements() const
-	{
-	   return this->writtenElements;
-	}
-	// TODO: this does not work for constant types
-#ifdef HAVE_NOT_CXX11
-	template< typename Type, typename Device, typename Index >
-	bool read( Type* buffer,
-	           const Index& elements );
-	template< typename Type, typename Device >
-	bool read( Type* buffer );
+   long int getReadElements() const
+   {
+      return this->readElements;
+   }
-	template< typename Type, typename Device, typename Index >
-	bool write( const Type* buffer,
-	            const Index elements );
+   long int getWrittenElements() const
+   {
+      return this->writtenElements;
+   }
-	template< typename Type, typename Device >
-	bool write( const Type* buffer );
    template< typename Type, typename Device = Devices::Host, typename Index = int >
    bool read( Type* buffer,
               const Index& elements );
@@ -106,12 +92,52 @@ class File
    template< typename Type, typename Device = Devices::Host >
    bool write( const Type* buffer );
-	bool close();
-	static int verbose;
+   bool close();
+   static int verbose;
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
+   bool read_impl( Type* buffer,
+                   const std::size_t& elements );
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
+             typename = void >
+   bool read_impl( Type* buffer,
+                   const std::size_t& elements );
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
+             typename = void,
+             typename = void >
+   bool read_impl( Type* buffer,
+                   const std::size_t& elements );
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
+   bool write_impl( const Type* buffer,
+                    const std::size_t& elements );
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
+             typename = void >
+   bool write_impl( const Type* buffer,
+                    const std::size_t& elements );
+   template< typename Type,
+             typename Device,
+             typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
+             typename = void,
+             typename = void >
+   bool write_impl( const Type* buffer,
+                    const std::size_t& elements );
 bool fileExists( const String& fileName );
diff --git a/src/TNL/File_impl.h b/src/TNL/File_impl.h
index a6b746f4880f7ebea1e52aee689c7d4b6fd21fcb..932399045f9e2b9990a6272788bbcd8a6f685aea 100644
--- a/src/TNL/File_impl.h
+++ b/src/TNL/File_impl.h
@@ -8,33 +8,38 @@
 /* See Copyright Notice in tnl/Copyright */
-#pragma once 
+#pragma once
+#include <type_traits>
+#include <TNL/File.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
+#include <TNL/Exceptions/MICSupportMissing.h>
 namespace TNL {
 template< typename Type, typename Device >
-bool File :: read( Type* buffer )
+bool File::read( Type* buffer )
    return read< Type, Device, int >( buffer, 1 );
 template< typename Type, typename Device >
-bool File :: write( const Type* buffer )
+bool File::write( const Type* buffer )
    return write< Type, Device, int >( buffer, 1 );
 template< typename Type, typename Device, typename Index >
-bool File :: read( Type* buffer,
-                   const Index& _elements )
+bool File::read( Type* buffer,
+                 const Index& _elements )
-   TNL_ASSERT( _elements >= 0,
-           std::cerr << " elements = " << _elements << std::endl; );
+   TNL_ASSERT_GE( _elements, 0, "Number of elements to read must be non-negative." );
-   // convert _elements from Index to size_t, which is *unsigned* type
+   // convert _elements from Index to std::size_t, which is *unsigned* type
    // (expected by fread etc)
-   size_t elements = (size_t) _elements;
+   std::size_t elements = (std::size_t) _elements;
    if( ! elements )
       return true;
@@ -43,95 +48,141 @@ bool File :: read( Type* buffer,
       std::cerr << "File " << fileName << " was not properly opened. " << std::endl;
       return false;
-   if( mode != tnlReadMode )
+   if( mode != IOMode::read )
       std::cerr << "File " << fileName << " was not opened for reading. " << std::endl;
       return false;
+   return read_impl< Type, Device >( buffer, elements );
+// Host
+template< typename Type,
+          typename Device,
+          typename >
+bool File::read_impl( Type* buffer,
+                      const std::size_t& elements )
    this->readElements = 0;
-   const size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ),
-                                             elements );
-   void* host_buffer( 0 );
-   if( std::is_same< Device, Devices::Host >::value )
+   if( std::fread( buffer,
+                   sizeof( Type ),
+                   elements,
+                   file ) != elements )
-      if( fread( buffer,
-             sizeof( Type ),
-             elements,
-             file ) != elements )
+      std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl;
+      std::perror( "Fread ended with the error code" );
+      return false;
+   }
+   this->readElements = elements;
+   return true;
+// Cuda
+template< typename Type,
+          typename Device,
+          typename, typename >
+bool File::read_impl( Type* buffer,
+                      const std::size_t& elements )
+#ifdef HAVE_CUDA
+   this->readElements = 0;
+   const std::size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ), elements );
+   using BaseType = typename std::remove_cv< Type >::type;
+   BaseType* host_buffer = new BaseType[ host_buffer_size ];
+   while( readElements < elements )
+   {
+      std::size_t transfer = std::min( elements - readElements, host_buffer_size );
+      std::size_t transfered = std::fread( host_buffer, sizeof( Type ), transfer, file );
+      if( transfered != transfer )
          std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl;
-         perror( "Fread ended with the error code" );
+         std::cerr << transfered << " bytes were transfered. " << std::endl;
+         std::perror( "Fread ended with the error code" );
+         delete[] host_buffer;
          return false;
-      this->readElements = elements;
-      return true;
+      cudaMemcpy( ( void* ) & ( buffer[ readElements ] ),
+                  host_buffer,
+                  transfer * sizeof( Type ),
+                  cudaMemcpyHostToDevice );
+      if( ! TNL_CHECK_CUDA_DEVICE )
+      {
+         std::cerr << "Transfer of data from the CUDA device to the file " << this->fileName
+              << " failed." << std::endl;
+         delete[] host_buffer;
+         return false;
+      }
+      this->readElements += transfer;
-   if( std::is_same< Device, Devices::Cuda >::value )
+   delete[] host_buffer;
+   return true;
+   throw Exceptions::CudaSupportMissing();
+// MIC
+template< typename Type,
+          typename Device,
+          typename, typename, typename >
+bool File::read_impl( Type* buffer,
+                      const std::size_t& elements )
+#ifdef HAVE_MIC
+   this->readElements = 0;
+   const std::size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ), elements );
+   Type * host_buffer = (Type *)malloc( sizeof( Type ) * host_buffer_size );
+   readElements = 0;
+   if( ! host_buffer )
-#ifdef HAVE_CUDA
-      /*!***
-       * Here we cannot use
-       *
-       * host_buffer = new Type[ host_buffer_size ];
-       *
-       * because it does not work for constant types like
-       * T = const bool.
-       */
-      host_buffer = malloc( sizeof( Type ) * host_buffer_size );
-      readElements = 0;
-      if( ! host_buffer )
+      std::cerr << "I am sorry but I cannot allocate supporting buffer on the host for writing data from the GPU to the file "
+                << this->getFileName() << "." << std::endl;
+      return false;
+   }
+   while( readElements < elements )
+   {
+      int transfer = std::min(  elements - readElements , host_buffer_size );
+      size_t transfered = fread( host_buffer, sizeof( Type ), transfer, file );
+      if( transfered != transfer )
-         std::cerr << "I am sorry but I cannot allocate supporting buffer on the host for writing data from the GPU to the file "
-              << this->getFileName() << "." << std::endl;
+         std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl;
+         std::cerr << transfered << " bytes were transfered. " << std::endl;
+         perror( "Fread ended with the error code" );
          return false;
-      while( readElements < elements )
+      Devices::MICHider<Type> device_buff;
+      device_buff.pointer=buffer;
+      #pragma offload target(mic) in(device_buff,readElements) in(host_buffer:length(transfer))
-         size_t transfer = std::min( elements - readElements, host_buffer_size );
-         size_t transfered = fread( host_buffer, sizeof( Type ), transfer, file );
-         if( transfered != transfer )
-         {
-            std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl;
-            std::cerr << transfered << " bytes were transfered. " << std::endl;
-            perror( "Fread ended with the error code" );
-            return false;
-         }
-         cudaMemcpy( ( void* ) & ( buffer[ readElements ] ),
-                     host_buffer,
-                     transfer * sizeof( Type ),
-                     cudaMemcpyHostToDevice );
-         if( ! checkCudaDevice )
-         {
-            std::cerr << "Transfer of data from the CUDA device to the file " << this->fileName
-                 << " failed." << std::endl;
-            free( host_buffer );
-            return false;
-         }
-         readElements += transfer;
+         /*
+         for(int i=0;i<transfer;i++)
+              device_buff.pointer[readElements+i]=host_buffer[i];
+          */
+         memcpy(&(device_buff.pointer[readElements]),host_buffer, transfer*sizeof(Type) );
-      free( host_buffer );
-      return true;
-      CudaSupportMissingMessage;;
-      return false;
+      readElements += transfer;
+   free( host_buffer );
    return true;
+   throw Exceptions::MICSupportMissing();
 template< class Type, typename Device, typename Index >
-bool File :: write( const Type* buffer,
-                    const Index _elements )
+bool File::write( const Type* buffer,
+                  const Index _elements )
-   TNL_ASSERT( _elements >= 0,
-           std::cerr << " elements = " << _elements << std::endl; );
+   TNL_ASSERT_GE( _elements, 0, "Number of elements to write must be non-negative." );
-   // convert _elements from Index to size_t, which is *unsigned* type
+   // convert _elements from Index to std::size_t, which is *unsigned* type
    // (expected by fread etc)
-   size_t elements = (size_t) _elements;
+   std::size_t elements = (std::size_t) _elements;
    if( ! elements )
       return true;
@@ -140,85 +191,134 @@ bool File :: write( const Type* buffer,
       std::cerr << "File " << fileName << " was not properly opened. " << std::endl;
       return false;
-   if( mode != tnlWriteMode )
+   if( mode != IOMode::write )
       std::cerr << "File " << fileName << " was not opened for writing. " << std::endl;
       return false;
-   Type* buf = const_cast< Type* >( buffer );
-   void* host_buffer( 0 );
+   return write_impl< Type, Device >( buffer, elements );
+// Host
+template< typename Type,
+          typename Device,
+          typename >
+bool File::write_impl( const Type* buffer,
+                       const std::size_t& elements )
+   this->writtenElements = 0;
+   if( std::fwrite( buffer,
+                    sizeof( Type ),
+                    elements,
+                    this->file ) != elements )
+   {
+      std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl;
+      std::perror( "Fwrite ended with the error code" );
+      return false;
+   }
+   this->writtenElements = elements;
+   return true;
+// Cuda
+template< typename Type,
+          typename Device,
+          typename, typename >
+bool File::write_impl( const Type* buffer,
+                       const std::size_t& elements )
+#ifdef HAVE_CUDA
    this->writtenElements = 0;
-   const size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ),
+   const std::size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ),
                                              elements );
-   if( std::is_same< Device, Devices::Host >::value )
+   using BaseType = typename std::remove_cv< Type >::type;
+   BaseType* host_buffer = new BaseType[ host_buffer_size ];
+   while( this->writtenElements < elements )
-      if( fwrite( buf,
-                  sizeof( Type ),
-                  elements,
-                  this->file ) != elements )
+      std::size_t transfer = std::min( elements - this->writtenElements, host_buffer_size );
+      cudaMemcpy( host_buffer,
+                  ( void* ) & ( buffer[ this->writtenElements ] ),
+                  transfer * sizeof( Type ),
+                  cudaMemcpyDeviceToHost );
+      if( ! TNL_CHECK_CUDA_DEVICE )
+      {
+         std::cerr << "Transfer of data from the file " << this->fileName
+              << " to the CUDA device failed." << std::endl;
+         delete[] host_buffer;
+         return false;
+      }
+      if( std::fwrite( host_buffer,
+                       sizeof( Type ),
+                       transfer,
+                       this->file ) != transfer )
          std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl;
-         perror( "Fwrite ended with the error code" );
+         std::perror( "Fwrite ended with the error code" );
+         delete[] host_buffer;
          return false;
-      this->writtenElements = elements;
-      return true;
+      this->writtenElements += transfer;
-   if( std::is_same< Device, Devices::Cuda >::value )
+   delete[] host_buffer;
+   return true;
+   throw Exceptions::CudaSupportMissing();
+// MIC
+template< typename Type,
+          typename Device,
+          typename, typename, typename >
+bool File::write_impl( const Type* buffer,
+                       const std::size_t& elements )
+#ifdef HAVE_MIC
+   this->writtenElements = 0;
+   const std::size_t host_buffer_size = std::min( tnlFileGPUvsCPUTransferBufferSize / sizeof( Type ),
+                                                  elements );
+   Type * host_buffer = (Type *)malloc( sizeof( Type ) * host_buffer_size );
+   if( ! host_buffer )
-#ifdef HAVE_CUDA
-         /*!***
-          * Here we cannot use
-          *
-          * host_buffer = new Type[ host_buffer_size ];
-          *
-          * because it does not work for constant types like
-          * T = const bool.
+      std::cerr << "I am sorry but I cannot allocate supporting buffer on the host for writing data from the GPU to the file "
+                << this->getFileName() << "." << std::endl;
+      return false;
+   }
+   while( this->writtenElements < elements )
+   {
+       std::size_t transfer = std::min( elements - this->writtenElements, host_buffer_size );
+      Devices::MICHider<const Type> device_buff;
+      device_buff.pointer=buffer;
+      #pragma offload target(mic) in(device_buff,writtenElements) out(host_buffer:length(transfer))
+      {
+         /*for(int i=0;i<transfer;i++)
+              host_buffer[i]=device_buff.pointer[writtenElements+i];
-         host_buffer = malloc( sizeof( Type ) * host_buffer_size );
-         if( ! host_buffer )
-         {
-            std::cerr << "I am sorry but I cannot allocate supporting buffer on the host for writing data from the GPU to the file "
-                 << this->getFileName() << "." << std::endl;
-            return false;
-         }
-         while( this->writtenElements < elements )
-         {
-            size_t transfer = std::min( elements - this->writtenElements, host_buffer_size );
-            cudaMemcpy( host_buffer,
-                       ( void* ) & ( buffer[ this->writtenElements ] ),
-                       transfer * sizeof( Type ),
-                       cudaMemcpyDeviceToHost );
-            if( ! checkCudaDevice )
-            {
-               std::cerr << "Transfer of data from the file " << this->fileName
-                    << " to the CUDA device failed." << std::endl;
-               free( host_buffer );
-               return false;
-            }
-            if( fwrite( host_buffer,
-                        sizeof( Type ),
-                        transfer,
-                        this->file ) != transfer )
-            {
-               std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl;
-               perror( "Fwrite ended with the error code" );
-               return false;
-            }
-            this->writtenElements += transfer;
-         }
-         free( host_buffer );
-         return true;
-         CudaSupportMissingMessage;;
+         memcpy(host_buffer,&(device_buff.pointer[writtenElements]), transfer*sizeof(Type) );
+      }
+      if( fwrite( host_buffer,
+                  sizeof( Type ),
+                  transfer,
+                  this->file ) != transfer )
+      {
+         std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl;
+         perror( "Fwrite ended with the error code" );
          return false;
+      }
+      this->writtenElements += transfer;
+   free( host_buffer );
    return true;
+   throw Exceptions::MICSupportMissing();
 } // namespace TNL
diff --git a/src/TNL/Functions/Analytic/Blob.h b/src/TNL/Functions/Analytic/Blob.h
index 57786beeebf6e9fbfaebed4895e86957f19b9027..e12a27393c7077f71fe57137a6fce3a2abc00d0c 100644
--- a/src/TNL/Functions/Analytic/Blob.h
+++ b/src/TNL/Functions/Analytic/Blob.h
@@ -54,15 +54,9 @@ class Blob< 1, Real > : public BlobBase< Real, 1 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -85,15 +79,9 @@ class Blob< 2, Real > : public BlobBase< Real, 2 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -117,15 +105,9 @@ class Blob< 3, Real > : public BlobBase< Real, 3 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/CMakeLists.txt b/src/TNL/Functions/Analytic/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Functions/Analytic/Constant.h b/src/TNL/Functions/Analytic/Constant.h
index c0c27c19f412d3fda6db952bf34f0d39c9eedb8d..927d5a421320cd91eeaaa759add37126f3242c0e 100644
--- a/src/TNL/Functions/Analytic/Constant.h
+++ b/src/TNL/Functions/Analytic/Constant.h
@@ -27,6 +27,7 @@ class Constant : public Domain< dimensions, NonspaceDomain >
       typedef Real RealType;
       typedef Containers::StaticVector< dimensions, RealType > PointType;
+      __cuda_callable__
       static void configSetup( Config::ConfigDescription& config,
@@ -39,15 +40,9 @@ class Constant : public Domain< dimensions, NonspaceDomain >
       const RealType& getConstant() const;
-   #ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
-   #else
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
-   #endif
       __cuda_callable__ inline
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/Constant_impl.h b/src/TNL/Functions/Analytic/Constant_impl.h
index 21311046be06cba4ce7f36ad4e8839d37482e459..bbd46853dc3cbe98448819876ffc8fc17940cc1a 100644
--- a/src/TNL/Functions/Analytic/Constant_impl.h
+++ b/src/TNL/Functions/Analytic/Constant_impl.h
@@ -16,6 +16,7 @@ namespace Analytic {
 template< int Dimension,
           typename Real >
 Constant< Dimension, Real >::
 : constant( 0.0 )
diff --git a/src/TNL/Functions/Analytic/Cylinder.h b/src/TNL/Functions/Analytic/Cylinder.h
index 831b9fa18f1862949bb794318bc72eebb01f33c6..fb3f0542ceda6b3c1b334c300419549d5217bf2b 100644
--- a/src/TNL/Functions/Analytic/Cylinder.h
+++ b/src/TNL/Functions/Analytic/Cylinder.h
@@ -58,17 +58,10 @@ class Cylinder< 1, Real > : public CylinderBase< Real, 1 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -92,17 +85,10 @@ class Cylinder< 2, Real > : public CylinderBase< Real, 2 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -126,17 +112,10 @@ class Cylinder< 3, Real > : public CylinderBase< Real, 3 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/ExpBump.h b/src/TNL/Functions/Analytic/ExpBump.h
index ce82270ccc70cc5b0044a7719ed4eec3d7234e3a..36b07c9a27d549532fb055c01c075cf5e30aa8a8 100644
--- a/src/TNL/Functions/Analytic/ExpBump.h
+++ b/src/TNL/Functions/Analytic/ExpBump.h
@@ -62,15 +62,9 @@ class ExpBump< 1, Real > : public ExpBumpBase< 1, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
    RealType getPartialDerivative( const PointType& v,
                                   const Real& time = 0.0 ) const;
@@ -92,15 +86,9 @@ class ExpBump< 2, Real > : public ExpBumpBase< 2, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
    __cuda_callable__ inline
    RealType getPartialDerivative( const PointType& v,
                                   const Real& time = 0.0 ) const;
@@ -123,15 +111,9 @@ class ExpBump< 3, Real > : public ExpBumpBase< 3, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
    RealType getPartialDerivative( const PointType& v,
                                   const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/Flowerpot.h b/src/TNL/Functions/Analytic/Flowerpot.h
index 2d33a4a9c989b8d0265576edbcc76ca4305418a8..5a42c5f94249aa69c320959a9713f13f88beec56 100644
--- a/src/TNL/Functions/Analytic/Flowerpot.h
+++ b/src/TNL/Functions/Analytic/Flowerpot.h
@@ -58,17 +58,10 @@ class Flowerpot< 1, Real > : public FlowerpotBase< Real, 1 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -92,17 +85,10 @@ class Flowerpot< 2, Real > : public FlowerpotBase< Real, 2 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -126,17 +112,10 @@ class Flowerpot< 3, Real > : public FlowerpotBase< Real, 3 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/Paraboloid.h b/src/TNL/Functions/Analytic/Paraboloid.h
index dd15623785837cbac2d35388ba5af5cb6dc6fbb0..7bbdcca53402994574d45e07526e5af0839a1ec7 100644
--- a/src/TNL/Functions/Analytic/Paraboloid.h
+++ b/src/TNL/Functions/Analytic/Paraboloid.h
@@ -67,15 +67,9 @@ class Paraboloid< 1, Real > : public ParaboloidBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -97,15 +91,9 @@ class Paraboloid< 2, Real > : public ParaboloidBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -129,15 +117,9 @@ class Paraboloid< 3, Real > : public ParaboloidBase< 3, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/ParaboloidSDF.h b/src/TNL/Functions/Analytic/ParaboloidSDF.h
index 20c6662aacb74f482c979cb2e60713109dab000a..60a8cedfd754c1ff350c659475b24bd5d5dedaff 100644
--- a/src/TNL/Functions/Analytic/ParaboloidSDF.h
+++ b/src/TNL/Functions/Analytic/ParaboloidSDF.h
@@ -67,15 +67,9 @@ class ParaboloidSDF< 1, Real > : public ParaboloidSDFBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -94,15 +88,9 @@ class ParaboloidSDF< 2, Real > : public ParaboloidSDFBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -123,15 +111,9 @@ class ParaboloidSDF< 3, Real > : public ParaboloidSDFBase< 3, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/PseudoSquare.h b/src/TNL/Functions/Analytic/PseudoSquare.h
index 2c027a739367a44fc7cadf141add4abd61f5b8cd..ea4a5ae84e7e306560e67c74f40075cd3cc5a883 100644
--- a/src/TNL/Functions/Analytic/PseudoSquare.h
+++ b/src/TNL/Functions/Analytic/PseudoSquare.h
@@ -54,15 +54,9 @@ class PseudoSquare< 1, Real > : public PseudoSquareBase< Real, 1 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -85,15 +79,9 @@ class PseudoSquare< 2, Real > : public PseudoSquareBase< Real, 2 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -116,15 +104,9 @@ class PseudoSquare< 3, Real > : public PseudoSquareBase< Real, 3 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/SinBumps.h b/src/TNL/Functions/Analytic/SinBumps.h
index 9af5ef739d1e72f629daa4cf6a2649500c5899b7..19b176cc8d48578db7ad3c314b89b30eb66ec760 100644
--- a/src/TNL/Functions/Analytic/SinBumps.h
+++ b/src/TNL/Functions/Analytic/SinBumps.h
@@ -72,15 +72,9 @@ class SinBumps< 1, Real  > : public SinBumpsBase< Containers::StaticVector< 1, R
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -105,15 +99,9 @@ class SinBumps< 2, Real > : public SinBumpsBase< Containers::StaticVector< 2, Re
       bool setup( const Config::ParameterContainer& parameters,
                  const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -137,15 +125,9 @@ class SinBumps< 3, Real > : public SinBumpsBase< Containers::StaticVector< 3, Re
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/SinBumpsSDF.h b/src/TNL/Functions/Analytic/SinBumpsSDF.h
index 6f4fe5979889d772814f13407a4e36d46b10387a..e2e242eb4a3bea1fbd784d0923f7f95d4dc0d532 100644
--- a/src/TNL/Functions/Analytic/SinBumpsSDF.h
+++ b/src/TNL/Functions/Analytic/SinBumpsSDF.h
@@ -70,15 +70,9 @@ class SinBumpsSDF< 1, Real  > : public SinBumpsSDFBase< Containers::StaticVector
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -103,15 +97,9 @@ class SinBumpsSDF< 2, Real > : public SinBumpsSDFBase< Containers::StaticVector<
       bool setup( const Config::ParameterContainer& parameters,
                  const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -135,15 +123,9 @@ class SinBumpsSDF< 3, Real > : public SinBumpsSDFBase< Containers::StaticVector<
       bool setup( const Config::ParameterContainer& parameters,
                   const String& prefix = "" );
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/SinWave.h b/src/TNL/Functions/Analytic/SinWave.h
index f4b955e2e815ee089df5b31b9efb9bf55245bf06..635125818381d383f9db36f313498887f8f18ac3 100644
--- a/src/TNL/Functions/Analytic/SinWave.h
+++ b/src/TNL/Functions/Analytic/SinWave.h
@@ -68,15 +68,9 @@ class SinWave< 1, Real > : public SinWaveBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -95,15 +89,9 @@ class SinWave< 2, Real > : public SinWaveBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -124,15 +112,9 @@ class SinWave< 3, Real > : public SinWaveBase< 3, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/SinWaveSDF.h b/src/TNL/Functions/Analytic/SinWaveSDF.h
index b186693f37a6e656d3e01791bd7a69c25a61bdfb..e4db3f3a5832749e2da34d9e516c75d07c595196 100644
--- a/src/TNL/Functions/Analytic/SinWaveSDF.h
+++ b/src/TNL/Functions/Analytic/SinWaveSDF.h
@@ -66,15 +66,9 @@ class SinWaveSDF< 1, Real > : public SinWaveSDFBase< 1, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 1, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -93,15 +87,9 @@ class SinWaveSDF< 2, Real > : public SinWaveSDFBase< 2, Real >
       typedef Real RealType;
       typedef Containers::StaticVector< 2, RealType > PointType;
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                                      const Real& time = 0.0 ) const;
@@ -122,15 +110,9 @@ class SinWaveSDF< 3, Real > : public SinWaveSDFBase< 3, Real >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
       RealType getPartialDerivative( const PointType& v,
                          const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/SinWaveSDF_impl.h b/src/TNL/Functions/Analytic/SinWaveSDF_impl.h
index d7ac240901502c9d3fcfb391da969954dc40609d..d104ca32210896e9103dbd4205670da4202c1940 100644
--- a/src/TNL/Functions/Analytic/SinWaveSDF_impl.h
+++ b/src/TNL/Functions/Analytic/SinWaveSDF_impl.h
@@ -13,8 +13,8 @@
 #include <TNL/Functions/Analytic/SinWaveSDF.h>
 namespace TNL {
-   namespace Functions {
-      namespace Analytic {
+namespace Functions {
+namespace Analytic {
 template< int dimensions, typename Real >
 SinWaveSDFBase< dimensions, Real >::SinWaveSDFBase()
@@ -115,7 +115,7 @@ getPartialDerivative( const PointType& v,
    const RealType distance = ::sqrt( x * x ) + this->phase * this->waveLength / (2.0*M_PI);
    if( XDiffOrder == 0 )
       return this->sinWaveFunctionSDF( distance );
-   TNL_ASSERT( false, std::cerr << "TODO: implement this" );
+   TNL_ASSERT_TRUE( false, "TODO: implement this" );
    return 0.0;
@@ -138,7 +138,7 @@ getPartialDerivative( const PointType& v,
    const RealType distance  = ::sqrt( x * x + y * y ) + this->phase * this->waveLength / (2.0*M_PI);
    if( XDiffOrder == 0 && YDiffOrder == 0)
       return this->sinWaveFunctionSDF( distance );
-   TNL_ASSERT( false, std::cerr << "TODO: implement this" );
+   TNL_ASSERT_TRUE( false, "TODO: implement this" );
    return 0.0;
@@ -158,10 +158,10 @@ getPartialDerivative( const PointType& v,
    const RealType distance  = ::sqrt( x * x +  y * y + z * z ) +  this->phase * this->waveLength / (2.0*M_PI);
    if( XDiffOrder == 0 && YDiffOrder == 0 && ZDiffOrder == 0 )
       return this->sinWaveFunctionSDF( distance );
-   TNL_ASSERT( false, std::cerr << "TODO: implement this" );
+   TNL_ASSERT_TRUE( false, "TODO: implement this" );
    return 0.0;
-      } // namespace Analytic
-   } // namespace Functions
+} // namespace Analytic
+} // namespace Functions
 } // namespace TNL
diff --git a/src/TNL/Functions/Analytic/Twins.h b/src/TNL/Functions/Analytic/Twins.h
index 31ecdd6e4cc8b481f885669ee07bc6aad809d89c..c882ec4eb133c326195151b9d6db07098bec3735 100644
--- a/src/TNL/Functions/Analytic/Twins.h
+++ b/src/TNL/Functions/Analytic/Twins.h
@@ -50,17 +50,10 @@ class Twins< 1, Real > : public TwinsBase< Real, 1 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -84,17 +77,10 @@ class Twins< 2, Real > : public TwinsBase< Real, 2 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
@@ -118,17 +104,10 @@ class Twins< 3, Real > : public TwinsBase< Real, 3 >
-#ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder,
-                typename Point >
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0,
                 typename Point = PointType >
       RealType getPartialDerivative( const Point& v,
                                      const Real& time = 0.0 ) const;
diff --git a/src/TNL/Functions/Analytic/VectorNorm.h b/src/TNL/Functions/Analytic/VectorNorm.h
index 447b31973815ddbdee4ff99c09c518d7fd75b753..b54513705980249db0037044b00dabeca045bce6 100644
--- a/src/TNL/Functions/Analytic/VectorNorm.h
+++ b/src/TNL/Functions/Analytic/VectorNorm.h
@@ -211,7 +211,7 @@ class VectorNorm< 2, Real > : public VectorNormBase< 2, Real >
             return ( std::pow( std::pow( TNL::abs( x ), this->power ) * this->anisotropy.x() + 
                                std::pow( TNL::abs( y ), this->power ) * this->anisotropy.y(), 1.0 / this-> power ) - this->radius ) * this->multiplicator;
-         TNL_ASSERT( false, std::cerr << "Not implemented yet." << std::endl );
+         TNL_ASSERT_TRUE( false, "Not implemented yet." );
          return 0.0;
@@ -262,7 +262,7 @@ class VectorNorm< 3, Real > : public VectorNormBase< 3, Real >
                                std::pow( TNL::abs( y ), this->power ) * this->anisotropy.y() +
                                std::pow( TNL::abs( z ), this->power ) * this->anisotropy.z(), 1.0 / this-> power ) - this->radius ) * this->multiplicator;
-         TNL_ASSERT( false, std::cerr << "Not implemented yet." << std::endl );
+         TNL_ASSERT_TRUE( false, "Not implemented yet." );
          return 0.0;
diff --git a/src/TNL/Functions/CMakeLists.txt b/src/TNL/Functions/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Functions/FunctionAdapter.h b/src/TNL/Functions/FunctionAdapter.h
index 2128ce811d135c28262b8b8b0c684d7016cfcaba..425996f5282e26cdf345c9543a5bb24c50ef2575 100644
--- a/src/TNL/Functions/FunctionAdapter.h
+++ b/src/TNL/Functions/FunctionAdapter.h
@@ -29,7 +29,7 @@ class FunctionAdapter
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       //typedef typename FunctionType::PointType PointType;
       template< typename MeshPointer >
@@ -64,7 +64,7 @@ class FunctionAdapter< Mesh, Function, SpaceDomain >
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       typedef typename FunctionType::PointType PointType;
       template< typename MeshPointer >
@@ -101,7 +101,7 @@ class FunctionAdapter< Mesh, Function, NonspaceDomain >
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       typedef typename FunctionType::PointType PointType;
       template< typename MeshPointer >
@@ -137,7 +137,7 @@ class FunctionAdapter< Mesh, Function, MeshFunction >
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       template< typename EntityType >
       __cuda_callable__ inline
@@ -161,7 +161,7 @@ class FunctionAdapter< Mesh, Function, SpaceDomain >
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       typedef typename FunctionType::PointType PointType;
       template< typename EntityType >
@@ -186,7 +186,7 @@ class FunctionAdapter< Mesh, Function, SpaceDomain >
       typedef Function FunctionType;
       typedef Mesh MeshType;
       typedef typename FunctionType::RealType  RealType;
-      typedef typename MeshType::IndexType     IndexType;
+      typedef typename MeshType::GlobalIndexType     IndexType;
       typedef typename FunctionType::PointType PointType;
       template< typename EntityType >
diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h
index b7744cb3d37327707a9324e6eeb14f4e77da9d47..2c8d41a564c3af16ad406fbf9dca5cf95f3e98e5 100644
--- a/src/TNL/Functions/MeshFunction.h
+++ b/src/TNL/Functions/MeshFunction.h
@@ -20,11 +20,11 @@ namespace TNL {
 namespace Functions {   
 template< typename Mesh,
-          int MeshEntityDimension = Mesh::meshDimension,
+          int MeshEntityDimension = Mesh::getMeshDimension(),
           typename Real = typename Mesh::RealType >
 class MeshFunction :
    public Object,
-   public Domain< Mesh::meshDimension, MeshDomain >
+   public Domain< Mesh::getMeshDimension(), MeshDomain >
    //static_assert( Mesh::DeviceType::DeviceType == Vector::DeviceType::DeviceType,
    //               "Both mesh and vector of a mesh function must reside on the same device.");
@@ -32,7 +32,7 @@ class MeshFunction :
       typedef Mesh MeshType;
       typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType IndexType;
+      typedef typename MeshType::GlobalIndexType IndexType;
       typedef SharedPointer< MeshType > MeshPointer;      
       typedef Real RealType;
       typedef Containers::Vector< RealType, DeviceType, IndexType > VectorType;
@@ -40,7 +40,7 @@ class MeshFunction :
       static constexpr int getEntitiesDimension() { return MeshEntityDimension; }
-      static constexpr int getMeshDimensions() { return MeshType::getMeshDimension(); }
+      static constexpr int getMeshDimension() { return MeshType::getMeshDimension(); }
@@ -94,7 +94,7 @@ class MeshFunction :
       const MeshPointer& getMeshPointer() const;
-      __cuda_callable__ static IndexType getDofs( const MeshPointer& meshPointer );
+      static IndexType getDofs( const MeshPointer& meshPointer );
       __cuda_callable__ const VectorType& getData() const;      
diff --git a/src/TNL/Functions/MeshFunctionEvaluator.h b/src/TNL/Functions/MeshFunctionEvaluator.h
index ad08b901404b1110c81943ffd753984e9ebe9aaa..f4f544d1769523087c396b5691a1be1526bc6d5f 100644
--- a/src/TNL/Functions/MeshFunctionEvaluator.h
+++ b/src/TNL/Functions/MeshFunctionEvaluator.h
@@ -10,7 +10,6 @@
 #pragma once
-#include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/MeshFunction.h>
 #include <TNL/Functions/OperatorFunction.h>
 #include <TNL/Functions/FunctionAdapter.h>
@@ -162,4 +161,3 @@ class MeshFunctionEvaluatorAdditionEntitiesProcessor
 } // namespace TNL
 #include <TNL/Functions/MeshFunctionEvaluator_impl.h>
diff --git a/src/TNL/Functions/MeshFunctionEvaluator_impl.h b/src/TNL/Functions/MeshFunctionEvaluator_impl.h
index 408e0882fc1e354ae87d89e1859fc61150fd0049..540ae7077b73baab728c215c425b6bedfc478f1e 100644
--- a/src/TNL/Functions/MeshFunctionEvaluator_impl.h
+++ b/src/TNL/Functions/MeshFunctionEvaluator_impl.h
@@ -115,7 +115,7 @@ evaluateEntities( OutMeshFunctionPointer& meshFunction,
    static_assert( std::is_same< typename std::decay< typename OutMeshFunctionPointer::ObjectType >::type, OutMeshFunction >::value, "expected a smart pointer" );
    static_assert( std::is_same< typename std::decay< typename InFunctionPointer::ObjectType >::type, InFunction >::value, "expected a smart pointer" );
-   typedef typename MeshType::template MeshEntity< OutMeshFunction::getEntitiesDimension() > MeshEntityType;
+   typedef typename MeshType::template EntityType< OutMeshFunction::getEntitiesDimension() > MeshEntityType;
    typedef Functions::MeshFunctionEvaluatorAssignmentEntitiesProcessor< MeshType, TraverserUserData > AssignmentEntitiesProcessor;
    typedef Functions::MeshFunctionEvaluatorAdditionEntitiesProcessor< MeshType, TraverserUserData > AdditionEntitiesProcessor;
    //typedef typename OutMeshFunction::MeshPointer OutMeshPointer;
@@ -171,4 +171,3 @@ evaluateEntities( OutMeshFunctionPointer& meshFunction,
 } // namespace Functions
 } // namespace TNL
diff --git a/src/TNL/Functions/MeshFunctionNormGetter.h b/src/TNL/Functions/MeshFunctionNormGetter.h
index 6f7e127c173b0dc0b67f51f800ced2f9d333cc6a..50e39c6de767fa79828d7e4bc9c488356c1d5be6 100644
--- a/src/TNL/Functions/MeshFunctionNormGetter.h
+++ b/src/TNL/Functions/MeshFunctionNormGetter.h
@@ -134,7 +134,7 @@ class MeshFunctionNormGetter< MeshFunction< Meshes::Grid< Dimension, MeshReal, D
          if( EntityDimension > 0 )
-            TNL_ASSERT( false, std::cerr << "Not implemented yet." << std::endl );
+            TNL_ASSERT_TRUE( false, "Not implemented yet." );
          if( p == 1.0 )
diff --git a/src/TNL/Functions/MeshFunctionVTKWriter_impl.h b/src/TNL/Functions/MeshFunctionVTKWriter_impl.h
index 56f1d4cdd6ba3180df071209fae4e27b52fb1e92..606eac9ce348e2260109845af0bb05cea064a3e8 100644
--- a/src/TNL/Functions/MeshFunctionVTKWriter_impl.h
+++ b/src/TNL/Functions/MeshFunctionVTKWriter_impl.h
@@ -279,8 +279,8 @@ write( const MeshFunctionType& function,
        std::ostream& str,
        const double& scale )
-   typedef typename MeshType::template MeshEntity< 0 > Vertex;
-   typedef typename MeshType::template MeshEntity< 1 > Face;
+   typedef typename MeshType::template EntityType< 0 > Vertex;
+   typedef typename MeshType::template EntityType< 1 > Face;
    writeHeader(function, str);
    const MeshType& mesh = function.getMesh();
@@ -369,7 +369,7 @@ write( const MeshFunctionType& function,
        std::ostream& str,
        const double& scale )
-   typedef typename MeshType::template MeshEntity< 0 > Vertex;
+   typedef typename MeshType::template EntityType< 0 > Vertex;
    writeHeader(function, str);
    const MeshType& mesh = function.getMesh();
diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h
index 2ab5a06457527cb9bad9b89df70ed59e0f941175..7164170f1ab51a20bcdeaeb9011cb97bb97b4590 100644
--- a/src/TNL/Functions/MeshFunction_impl.h
+++ b/src/TNL/Functions/MeshFunction_impl.h
@@ -36,10 +36,10 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 MeshFunction( const MeshPointer& meshPointer )
 : meshPointer( meshPointer )
-   this->data.setSize( meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );
+   this->data.setSize( getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );
 template< typename Mesh,
@@ -62,10 +62,10 @@ MeshFunction( const MeshPointer& meshPointer,
               const IndexType& offset )
 : meshPointer( meshPointer )
-   this->data.bind( data, offset, meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );   
+   this->data.bind( data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );   
@@ -79,10 +79,10 @@ MeshFunction( const MeshPointer& meshPointer,
               const IndexType& offset )
 : meshPointer( meshPointer )
-   this->data.bind( *data, offset, meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );   
+   this->data.bind( *data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );   
 template< typename Mesh,
@@ -191,10 +191,10 @@ bind( const MeshPointer& meshPointer,
       const IndexType& offset )
    this->meshPointer = meshPointer;
-   this->data.bind( data, offset, meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );   
+   this->data.bind( data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );   
 template< typename Mesh,
@@ -208,10 +208,10 @@ bind( const MeshPointer& meshPointer,
       const IndexType& offset )
    this->meshPointer = meshPointer;
-   this->data.bind( *data, offset, meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );   
+   this->data.bind( *data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );   
@@ -223,10 +223,10 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 setMesh( const MeshPointer& meshPointer )
    this->meshPointer = meshPointer;
-   this->data.setSize( meshPointer->template getEntitiesCount< typename Mesh::template MeshEntity< MeshEntityDimension > >() );
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );   
+   this->data.setSize( getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );   
 template< typename Mesh,
@@ -254,7 +254,6 @@ getMeshPointer() const
 template< typename Mesh,
           int MeshEntityDimension,
           typename Real >
 typename MeshFunction< Mesh, MeshEntityDimension, Real >::IndexType
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 getDofs( const MeshPointer& meshPointer )
@@ -312,7 +311,7 @@ typename Functions::MeshFunction< Mesh, MeshEntityDimension, Real >::RealType
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 getValue( const EntityType& meshEntity ) const
-   static_assert( EntityType::entityDimension == MeshEntityDimension, "Calling with wrong EntityType -- entity dimension do not match." );
+   static_assert( EntityType::getEntityDimension() == MeshEntityDimension, "Calling with wrong EntityType -- entity dimensions do not match." );
    return this->data.getValue( meshEntity.getIndex() );
@@ -325,7 +324,7 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 setValue( const EntityType& meshEntity,
           const RealType& value )
-   static_assert( EntityType::entityDimension == MeshEntityDimension, "Calling with wrong EntityType -- entity dimension do not match." );
+   static_assert( EntityType::getEntityDimension() == MeshEntityDimension, "Calling with wrong EntityType -- entity dimensions do not match." );
    this->data.setValue( meshEntity.getIndex(), value );
@@ -339,7 +338,7 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 operator()( const EntityType& meshEntity,
             const RealType& time )
-   static_assert( EntityType::entityDimension == MeshEntityDimension, "Calling with wrong EntityType -- entity dimension do not match." );
+   static_assert( EntityType::getEntityDimension() == MeshEntityDimension, "Calling with wrong EntityType -- entity dimensions do not match." );
    return this->data[ meshEntity.getIndex() ];
@@ -353,7 +352,7 @@ MeshFunction< Mesh, MeshEntityDimension, Real >::
 operator()( const EntityType& meshEntity,
             const RealType& time ) const
-   static_assert( EntityType::entityDimension == MeshEntityDimension, "Calling with wrong EntityType -- entity dimension do not match." );
+   static_assert( EntityType::getEntityDimension() == MeshEntityDimension, "Calling with wrong EntityType -- entity dimensions do not match." );
    return this->data[ meshEntity.getIndex() ];
@@ -448,9 +447,9 @@ bool
 MeshFunction< Mesh, MeshEntityDimension, Real >::
 save( File& file ) const
-   TNL_ASSERT( this->data.getSize() == this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >(), 
-      std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
-                << "this->mesh->template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() = " << this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >() );
+   TNL_ASSERT( this->data.getSize() == this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >(), 
+               std::cerr << "this->data.getSize() = " << this->data.getSize() << std::endl
+                         << "this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() = " << this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >() );
    if( ! Object::save( file ) )
       return false;
    return this->data.save( file );
@@ -467,7 +466,7 @@ load( File& file )
       return false;
    if( ! this->data.load( file ) )
       return false;
-   const IndexType meshSize = this->meshPointer.getData().template getEntitiesCount< typename MeshType::template MeshEntity< MeshEntityDimension > >();
+   const IndexType meshSize = this->getMesh().template getEntitiesCount< typename MeshType::template EntityType< MeshEntityDimension > >();
    if( this->data.getSize() != meshSize )
       std::cerr << "Size of the data loaded to the mesh function (" << this->data.getSize() << ") does not fit with the mesh size (" << meshSize << ")." << std::endl;
diff --git a/src/TNL/Functions/OperatorFunction.h b/src/TNL/Functions/OperatorFunction.h
index 4ae8907daaee25d9c8d3305e0c447f44691f1420..c5dbde42363c27f2d58d53018d7efb41096b76ad 100644
--- a/src/TNL/Functions/OperatorFunction.h
+++ b/src/TNL/Functions/OperatorFunction.h
@@ -87,13 +87,13 @@ class OperatorFunction< Operator, MeshFunctionT, void, true, IsAnalytic >
       const MeshType& getMesh() const
-         TNL_ASSERT( this->preimageFunction, std::cerr << "The preimage function was not set." << std::endl );
+         TNL_ASSERT_TRUE( this->preimageFunction, "The preimage function was not set." );
          return this->preimageFunction->getMesh();
       const MeshPointer& getMeshPointer() const
-         TNL_ASSERT( this->preimageFunction, std::cerr << "The preimage function was not set." << std::endl );
+         TNL_ASSERT_TRUE( this->preimageFunction, "The preimage function was not set." );
          return this->preimageFunction->getMeshPointer(); 
@@ -114,7 +114,7 @@ class OperatorFunction< Operator, MeshFunctionT, void, true, IsAnalytic >
          const MeshEntity& meshEntity,
          const RealType& time = 0.0 ) const
-         TNL_ASSERT( this->preimageFunction, std::cerr << "The preimage function was not set." << std::endl );
+         TNL_ASSERT_TRUE( this->preimageFunction, "The preimage function was not set." );
          return operator_( *preimageFunction, meshEntity, time );
@@ -298,13 +298,13 @@ class OperatorFunction< Operator, Function, BoundaryConditions, false, IsAnalyti
       const PreimageFunctionType& getPreimageFunction() const
-         TNL_ASSERT( this->preimageFunction, );
+         TNL_ASSERT_TRUE( this->preimageFunction, "The preimage function was not set." );
          return *this->preimageFunction;
       PreimageFunctionType& getPreimageFunction()
-         TNL_ASSERT( this->preimageFunction, );
+         TNL_ASSERT_TRUE( this->preimageFunction, "The preimage function was not set." );
          return *this->preimageFunction;
diff --git a/src/TNL/Functions/TestFunction.h b/src/TNL/Functions/TestFunction.h
index d01c9df2c547f11f8b059a6f0ec6b8b42cc166c2..0f97ff21af51b74656afa4e9f2d554f026f20290 100644
--- a/src/TNL/Functions/TestFunction.h
+++ b/src/TNL/Functions/TestFunction.h
@@ -65,15 +65,9 @@ class TestFunction : public Domain< FunctionDimension, SpaceDomain >
       const TestFunction& operator = ( const TestFunction& function );
-   #ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
-   #else
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
-   #endif
       Real getPartialDerivative( const PointType& vertex,
                                  const Real& time = 0 ) const;
@@ -86,27 +80,12 @@ class TestFunction : public Domain< FunctionDimension, SpaceDomain >
-   #ifdef HAVE_NOT_CXX11
-      template< int XDiffOrder,
-                int YDiffOrder,
-                int ZDiffOrder >
-   #else
       template< int XDiffOrder = 0,
                 int YDiffOrder = 0,
                 int ZDiffOrder = 0 >
-   #endif
       Real getTimeDerivative( const PointType& vertex,
                               const Real& time = 0 ) const;
-   #ifdef HAVE_NOT_CXX11
-      template< typename Point >
-      __cuda_callable__
-      Real getTimeDerivative( const Point& vertex,
-                              const Real& time = 0 ) const
-      {
-         return this->getTimeDerivative< 0, 0, 0, Point >( vertex, time );
-      }
-   #endif
       std::ostream& print( std::ostream& str ) const;
diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h
index 2d31c65102801c4cb082419bf65141bdc7df6494..34ae43529b92cadb3f23ca4f57176255c9d09d45 100644
--- a/src/TNL/Functions/TestFunction_impl.h
+++ b/src/TNL/Functions/TestFunction_impl.h
@@ -131,7 +131,7 @@ setupFunction( const Config::ParameterContainer& parameters,
       this->function = Devices::Cuda::passToDevice( *auxFunction );
       delete auxFunction;
-      if( ! checkCudaDevice )
+      if( ! TNL_CHECK_CUDA_DEVICE )
          return false;
    return true;
@@ -161,7 +161,7 @@ setupOperator( const Config::ParameterContainer& parameters,
       this->operator_ = Devices::Cuda::passToDevice( *auxOperator );
       delete auxOperator;
-      if( ! checkCudaDevice )
+      if( ! TNL_CHECK_CUDA_DEVICE )
          return false;
    return true;
@@ -602,66 +602,66 @@ getTimeDerivative( const PointType& vertex,
       case constant:
          typedef Constant< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case paraboloid:
          typedef Paraboloid< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case expBump:
          typedef ExpBump< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinBumps:
          typedef SinBumps< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinWave:
          typedef SinWave< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case cylinder:
          typedef Cylinder< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case flowerpot:
          typedef Flowerpot< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case twins:
          typedef Twins< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case pseudoSquare:
          typedef PseudoSquare< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case blob:
          typedef Blob< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
@@ -670,19 +670,19 @@ getTimeDerivative( const PointType& vertex,
       case paraboloidSDF:
          typedef ParaboloidSDF< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinBumpsSDF:
          typedef SinBumpsSDF< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
       case sinWaveSDF:
          typedef SinWaveSDF< Dimension, Real > FunctionType;
-         return scale * ( ( FunctionType* ) function )->
+         return scale * ( ( FunctionType* ) function )->template
                   getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
diff --git a/src/TNL/Functions/VectorField.h b/src/TNL/Functions/VectorField.h
index ad31e09d8d571b96571b98e6216765819a59c559..2fe085e750c931d5d81cd1dbdf2c49fd9af38ad9 100644
--- a/src/TNL/Functions/VectorField.h
+++ b/src/TNL/Functions/VectorField.h
@@ -72,11 +72,11 @@ class VectorField
 template< int Size,
           typename Mesh,
-          int MeshEntityDimensions,
+          int MeshEntityDimension,
           typename Real >
-class VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, Real > >
-: public Functions::Domain< MeshFunction< Mesh, MeshEntityDimensions, Real >::getDomainDimension(), 
-                            MeshFunction< Mesh, MeshEntityDimensions, Real >::getDomainType() >,
+class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > >
+: public Functions::Domain< MeshFunction< Mesh, MeshEntityDimension, Real >::getDomainDimension(), 
+                            MeshFunction< Mesh, MeshEntityDimension, Real >::getDomainType() >,
    public Object
@@ -84,11 +84,11 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, Real > >
       typedef Mesh MeshType;
       typedef Real RealType;
       typedef SharedPointer< MeshType > MeshPointer;
-      typedef MeshFunction< MeshType, MeshEntityDimensions, RealType > FunctionType;
+      typedef MeshFunction< MeshType, MeshEntityDimension, RealType > FunctionType;
       typedef SharedPointer< FunctionType > FunctionPointer;
       typedef typename MeshType::DeviceType DeviceType;
       typedef typename MeshType::IndexType IndexType;
-      typedef VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, RealType > > ThisType;
+      typedef VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, RealType > > ThisType;
       typedef Containers::StaticVector< Size, RealType > VectorType;
       static void configSetup( Config::ConfigDescription& config,
@@ -182,6 +182,7 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, Real > >
          VectorType v;
          for( int i = 0; i < Size; i++ )
+            // FIXME: the dereferencing operator of FunctionPointer is not __cuda_callable__
             v[ i ] = ( *this->vectorField[ i ] )[ index ];
          return v;
@@ -192,6 +193,7 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, Real > >
          VectorType v;
          for( int i = 0; i < Size; i++ )
+            // FIXME: the dereferencing operator of FunctionPointer is not __cuda_callable__
             v[ i ] = ( *this->vectorField[ i ] )( meshEntity );
          return v;
@@ -260,14 +262,14 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimensions, Real > >
-template< int Dimensions,
+template< int Dimension,
           typename Function >
-std::ostream& operator << ( std::ostream& str, const VectorField< Dimensions, Function >& f )
+std::ostream& operator << ( std::ostream& str, const VectorField< Dimension, Function >& f )
-   for( int i = 0; i < Dimensions; i++ )
+   for( int i = 0; i < Dimension; i++ )
       str << "[ " << f[ i ] << " ]";
-      if( i < Dimensions - 1 )
+      if( i < Dimension - 1 )
          str << ", ";
    return str;
diff --git a/src/TNL/Math.h b/src/TNL/Math.h
index 1ff7cc923c8eed40b6676bed16e8371137b88c04..ca15bb876d6da3accedb65f7b9c729a0b6d7cda9 100644
--- a/src/TNL/Math.h
+++ b/src/TNL/Math.h
@@ -12,159 +12,108 @@
 #include <cmath>
 #include <type_traits>
+#include <algorithm>
-#include <TNL/Devices/Cuda.h>
-#ifdef HAVE_CUDA
-#include <cuda.h>
+#include <TNL/Devices/CudaCallable.h>
 namespace TNL {
 template< typename T1, typename T2 >
 using enable_if_same_base = std::enable_if< std::is_same< typename std::decay< T1 >::type, T2 >::value, T2 >;
+template< typename T1, typename T2 >
+using both_integral_or_floating = typename std::conditional<
+         ( std::is_integral< T1 >::value && std::is_integral< T2 >::value ) ||
+         ( std::is_floating_point< T1 >::value && std::is_floating_point< T2 >::value ),
+   std::true_type,
+   std::false_type >::type;
+// 1. If both types are integral or floating-point, the larger type is selected.
+// 2. If one type is integral and the other floating-point, the floating-point type is selected.
+// This is necessary only due to the limitations of nvcc. Note that clang and gcc
+// can handle automatic promotion using a single-type template, exactly like
+// std::min and std::max are implemented in STL.
+template< typename T1, typename T2 >
+using larger_type = typename std::conditional<
+         ( both_integral_or_floating< T1, T2 >::value && sizeof(T1) >= sizeof(T2) ) ||
+         std::is_floating_point<T1>::value,
+   T1, T2 >::type;
  * This function returns minimum of two numbers.
- * Specializations use the functions defined in the CUDA's math_functions.h
- * in CUDA device code and STL functions otherwise.
+ * GPU device code uses the functions defined in the CUDA's math_functions.h,
+ * MIC uses trivial override and host uses the STL functions.
-template< typename Type1, typename Type2 >
+template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
 __cuda_callable__ inline
-Type1 min( const Type1& a, const Type2& b )
+ResultType min( const T1& a, const T2& b )
+#if defined(__CUDA_ARCH__)
+   return ::min( (ResultType) a, (ResultType) b );
+#elif defined(__MIC__)
    return a < b ? a : b;
-// specialization for int
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, int >::type
-min( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::min( a, b );
-   return std::min( a, b );
-// specialization for float
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, float >::type
-min( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::fminf( a, b );
-   return std::fmin( a, b );
-// specialization for double
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, double >::type
-min( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::fmin( a, b );
-   return std::fmin( a, b );
+   return std::min( (ResultType) a, (ResultType) b );
  * This function returns maximum of two numbers.
- * Specializations use the functions defined in the CUDA's math_functions.h
- * in CUDA device code and STL functions otherwise.
+ * GPU device code uses the functions defined in the CUDA's math_functions.h,
+ * MIC uses trivial override and host uses the STL functions.
-template< typename Type1, typename Type2 >
+template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
-Type1 max( const Type1& a, const Type2& b )
+ResultType max( const T1& a, const T2& b )
+#if defined(__CUDA_ARCH__)
+   return ::max( (ResultType) a, (ResultType) b );
+#elif defined(__MIC__)
    return a > b ? a : b;
-// specialization for int
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, int >::type
-max( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::max( a, b );
-   return std::max( a, b );
-// specialization for float
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, float >::type
-max( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::fmaxf( a, b );
-   return std::fmax( a, b );
-// specialization for double
-template< class T >
-__cuda_callable__ inline
-typename enable_if_same_base< T, double >::type
-max( const T& a, const T& b )
-#ifdef __CUDA_ARCH__
-   return ::fmax( a, b );
-   return std::fmax( a, b );
+   return std::max( (ResultType) a, (ResultType) b );
  * This function returns absolute value of given number.
- * Specializations use the functions defined in the CUDA's math_functions.h
- * in CUDA device code and STL functions otherwise.
 template< class T >
 __cuda_callable__ inline
-typename std::enable_if< ! std::is_arithmetic< T >::value, T >::type
-abs( const T& n )
+T abs( const T& n )
+#if defined(__MIC__)
    if( n < ( T ) 0 )
       return -n;
    return n;
+   return std::abs( n );
-// specialization for any arithmetic type (e.g. int, float, double)
-template< class T >
+template< typename T1, typename T2, typename ResultType = larger_type< T1, T2 > >
 __cuda_callable__ inline
-typename std::enable_if< std::is_arithmetic< T >::value, T >::type
-abs( const T& n )
+ResultType pow( const T1& base, const T2& exp )
-#ifdef __CUDA_ARCH__
-   return ::abs( n );
+#if defined(__CUDA_ARCH__) || defined(__MIC__)
+   return ::pow( (ResultType) base, (ResultType) exp );
-   return std::abs( n );
+   return std::pow( (ResultType) base, (ResultType) exp );
-template< class T >
+template< typename T >
 __cuda_callable__ inline
-T pow( const T& base, const T& exp )
+T sqrt( const T& value )
-#ifdef __CUDA_ARCH__
-   return ::pow( base, exp );
+#if defined(__CUDA_ARCH__) || defined(__MIC__)
+   return ::sqrt( value );
-   return std::pow( base, exp );
+   return std::sqrt( value );
@@ -176,7 +125,7 @@ void swap( Type& a, Type& b )
    Type tmp( a );
    a = b;
    b = tmp;
 template< class T >
@@ -185,7 +134,7 @@ T sign( const T& a )
    if( a < ( T ) 0 ) return ( T ) -1;
    if( a == ( T ) 0 ) return ( T ) 0;
    return ( T ) 1;
 template< typename Real >
@@ -220,4 +169,3 @@ inline bool isPow2( long int x )
 } // namespace TNL
diff --git a/src/TNL/Matrices/CMakeLists.txt b/src/TNL/Matrices/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h
index bc06d7c2c0872744befb483db48c5b657a7d2f9e..e3d6253e8a09d27404f78801ef44c7ee2699ba81 100644
--- a/src/TNL/Matrices/CSR.h
+++ b/src/TNL/Matrices/CSR.h
@@ -30,7 +30,16 @@ class CSRDeviceDependentCode;
 template< typename Real, typename Device = Devices::Host, typename Index = int >
 class CSR : public Sparse< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class CSR;
    typedef Real RealType;
    typedef Device DeviceType;
@@ -52,15 +61,22 @@ class CSR : public Sparse< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const CSR< Real2, Device2, Index2 >& matrix );
+   void setLike( const CSR< Real2, Device2, Index2 >& matrix );
    void reset();
@@ -154,6 +170,14 @@ class CSR : public Sparse< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   CSR& operator=( const CSR& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -201,37 +225,43 @@ class CSR : public Sparse< Real, Device, Index >
    // The following getters allow us to interface TNL with external C-like
    // libraries such as UMFPACK or SuperLU, which need the raw data.
-   Index* getRowPointers()
+   const Containers::Vector< Index, Device, Index >&
+   getRowPointers() const
-       return this->rowPointers.getData();
+      return this->rowPointers;
-   const Index* getRowPointers() const
+   Containers::Vector< Index, Device, Index >&
+   getRowPointers()
-       return this->rowPointers.getData();
+      return this->rowPointers;
-   Index* getColumnIndexes()
+   const Containers::Vector< Index, Device, Index >&
+   getColumnIndexes() const
-       return this->columnIndexes.getData();
+      return this->columnIndexes;
-   const Index* getColumnIndexes() const
+   Containers::Vector< Index, Device, Index >&
+   getColumnIndexes()
-       return this->columnIndexes.getData();
+      return this->columnIndexes;
-   Real* getValues()
+   const Containers::Vector< Real, Device, Index >&
+   getValues() const
-       return this->values.getData();
+      return this->values;
-   const Real* getValues() const
+   Containers::Vector< Real, Device, Index >&
+   getValues()
-       return this->values.getData();
+      return this->values;
-   protected:
    Containers::Vector< Index, Device, Index > rowPointers;
@@ -248,4 +278,3 @@ class CSR : public Sparse< Real, Device, Index >
 } // namespace TNL
 #include <TNL/Matrices/CSR_impl.h>
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 6084df88a8afb4c7bb2389e356e438df71059b4c..0b47633ea9a78b906822bc1e0466e196846dd774 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -12,7 +12,6 @@
 #include <TNL/Matrices/CSR.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Math.h>
@@ -61,30 +60,46 @@ String CSR< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool CSR< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                         const IndexType columns )
+String CSR< Real, Device, Index >::getSerializationType()
-   if( ! Sparse< Real, Device, Index >::setDimensions( rows, columns ) ||
-       ! this->rowPointers.setSize( this->rows + 1 ) )
-      return false;
+   return HostType::getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String CSR< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void CSR< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                const IndexType columns )
+   Sparse< Real, Device, Index >::setDimensions( rows, columns );
+   this->rowPointers.setSize( this->rows + 1 );
    this->rowPointers.setValue( 0 );
-   return true;
 template< typename Real,
           typename Device,
           typename Index >
-bool CSR< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void CSR< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+   TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
     * Compute the rows pointers. The last one is
     * the end of the last row and so it says the
     * necessary length of the vectors this->values
     * and this->columnIndexes.
-   TNL_ASSERT( this->getRows() > 0, );
-   TNL_ASSERT( this->getColumns() > 0, );
-   Containers::SharedVector< IndexType, DeviceType, IndexType > rowPtrs;
+   Containers::Vector< IndexType, DeviceType, IndexType > rowPtrs;
    rowPtrs.bind( this->rowPointers.getData(), this->getRows() );
    rowPtrs = rowLengths;
    this->rowPointers.setElement( this->rows, 0 );
@@ -94,17 +109,24 @@ bool CSR< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLen
     * Allocate values and column indexes
-   if( ! this->values.setSize( this->rowPointers.getElement( this->rows ) ) ||
-       ! this->columnIndexes.setSize( this->rowPointers.getElement( this->rows ) ) )
-      return false;
+   this->values.setSize( this->rowPointers.getElement( this->rows ) );
+   this->columnIndexes.setSize( this->rowPointers.getElement( this->rows ) );
    this->columnIndexes.setValue( this->columns );
-   return true;
 template< typename Real,
           typename Device,
           typename Index >
 Index CSR< Real, Device, Index >::getRowLength( const IndexType row ) const
+   return this->rowPointers.getElement( row + 1 ) - this->rowPointers.getElement( row );
+template< typename Real,
+          typename Device,
+          typename Index >
+Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
    return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
@@ -115,12 +137,10 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool CSR< Real, Device, Index >::setLike( const CSR< Real2, Device2, Index2 >& matrix )
+void CSR< Real, Device, Index >::setLike( const CSR< Real2, Device2, Index2 >& matrix )
-   if( ! Sparse< Real, Device, Index >::setLike( matrix ) ||
-       ! this->rowPointers.setLike( matrix.rowPointers ) )
-      return false;
-   return true;
+   Sparse< Real, Device, Index >::setLike( matrix );
+   this->rowPointers.setLike( matrix.rowPointers );
 template< typename Real,
@@ -271,7 +291,7 @@ bool CSR< Real, Device, Index > :: setRowFast( const IndexType row,
    for( IndexType i = 0; i < elements; i++ )
-      printf( "Setting element row: %d column: %d value: %f \n", row, columnIndexes[ i ], values[ i ] );
+      //printf( "Setting element row: %d column: %d value: %f \n", row, columnIndexes[ i ], values[ i ] );
       this->columnIndexes[ elementPointer ] = columnIndexes[ i ];
       this->values[ elementPointer ] = values[ i ];
@@ -508,6 +528,36 @@ bool CSR< Real, Device, Index >::performSORIteration( const Vector& b,
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+CSR< Real, Device, Index >&
+CSR< Real, Device, Index >::operator=( const CSR& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->columnIndexes = matrix.columnIndexes;
+   this->rowPointers = matrix.rowPointers;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+CSR< Real, Device, Index >&
+CSR< Real, Device, Index >::operator=( const CSR< Real2, Device2, Index2 >& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->columnIndexes = matrix.columnIndexes;
+   this->rowPointers = matrix.rowPointers;
+   return *this;
 template< typename Real,
           typename Device,
           typename Index >
@@ -731,6 +781,38 @@ class CSRDeviceDependentCode< Devices::Host >
+#ifdef HAVE_MIC
+class CSRDeviceDependentCode< Devices::MIC >
+   public:
+      typedef Devices::MIC Device;
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const CSR< Real, Device, Index >& matrix,      
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+          cout <<"Not Implemented YET tnlCSRMatrixDeviceDependentCode for MIC" <<endl;
+      };
+  /*       const Index rows = matrix.getRows();
+         const tnlCSRMatrix< Real, Device, Index >* matrixPtr = &matrix;
+         const InVector* inVectorPtr = &inVector;
+         OutVector* outVectorPtr = &outVector;
+#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( tnlHost::isOMPEnabled() )
+         for( Index row = 0; row < rows; row ++ )
+            ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr );
+      }*/
 #ifdef HAVE_CUDA
 template< typename Real,
           typename Index,
@@ -773,7 +855,7 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
    Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
    InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
    OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
-   checkCudaDevice;
    dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
    const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
    const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
@@ -826,11 +908,11 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix,
                                               gridIdx );
-   checkCudaDevice;
    Devices::Cuda::freeFromDevice( kernel_this );
    Devices::Cuda::freeFromDevice( kernel_inVector );
    Devices::Cuda::freeFromDevice( kernel_outVector );
-   checkCudaDevice;
diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h
index 299f85f5d6b95f4833153a4fc403c92c9603570b..ba14092163815ca30e7f296705d00013ed84e124 100644
--- a/src/TNL/Matrices/ChunkedEllpack.h
+++ b/src/TNL/Matrices/ChunkedEllpack.h
@@ -62,8 +62,16 @@ __global__ void ChunkedEllpackVectorProductCudaKernel( const ChunkedEllpack< Rea
 template< typename Real, typename Device, typename Index >
 class ChunkedEllpack : public Sparse< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class ChunkedEllpack;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -74,6 +82,7 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
    typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
+   typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
@@ -81,15 +90,22 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix );
+   void setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix );
    void reset();
@@ -178,7 +194,7 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
    MatrixRow getRow( const IndexType rowIndex );
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+   ConstMatrixRow getRow( const IndexType rowIndex ) const;
    template< typename Vector >
@@ -214,6 +230,14 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   ChunkedEllpack& operator=( const ChunkedEllpack& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   ChunkedEllpack& operator=( const ChunkedEllpack< Real2, Device2, Index2 >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -227,8 +251,7 @@ class ChunkedEllpack : public Sparse< Real, Device, Index >
    void printStructure( std::ostream& str,
                         const String& = "" ) const;
-   protected:
    void resolveSliceSizes( const Containers::Vector< Index, Devices::Host, Index >& rowLengths );
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 6addfa7a3241f29d2eec07b2fc8b9ece99b917f7..5c5c71543c82c10cc2da55728fd7352d83b943b0 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -14,10 +14,6 @@
 #include <TNL/Containers/Vector.h>
 #include <TNL/Math.h>
-#ifdef HAVE_CUDA
-#include <cuda.h>
 namespace TNL {
 namespace Matrices {   
@@ -62,25 +58,38 @@ String ChunkedEllpack< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool ChunkedEllpack< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                                    const IndexType columns )
+String ChunkedEllpack< Real, Device, Index >::getSerializationType()
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String ChunkedEllpack< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void ChunkedEllpack< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                           const IndexType columns )
    TNL_ASSERT( rows > 0 && columns > 0,
               std::cerr << "rows = " << rows
                    << " columns = " << columns << std::endl );
-   if( ! Sparse< Real, Device, Index >::setDimensions( rows, columns ) )
-      return false;
+   Sparse< Real, Device, Index >::setDimensions( rows, columns );
     * Allocate slice info array. Note that there cannot be
     * more slices than rows.
-   if( ! this->slices.setSize( this->rows ) ||
-       ! this->rowToChunkMapping.setSize( this-> rows ) ||
-       ! this->rowToSliceMapping.setSize( this->rows ) ||
-       ! this->rowPointers.setSize( this->rows + 1 ) )
-      return false;
-   return true;
+   this->slices.setSize( this->rows );
+   this->rowToChunkMapping.setSize( this-> rows );
+   this->rowToSliceMapping.setSize( this->rows );
+   this->rowPointers.setSize( this->rows + 1 );
 template< typename Real,
@@ -206,10 +215,11 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( const CompressedRowLengths
 template< typename Real,
           typename Device,
           typename Index >
-bool ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
-   TNL_ASSERT( this->getRows() > 0, );
-   TNL_ASSERT( this->getColumns() > 0, );
+   TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
    IndexType elementsToAllocation( 0 );
@@ -245,7 +255,7 @@ bool ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( const Compr
       elementsToAllocation = hostMatrix.values.getSize();
    this->maxRowLength = rowLengths.max();
-   return Sparse< Real, Device, Index >::allocateMatrixElements( elementsToAllocation );
+   Sparse< Real, Device, Index >::allocateMatrixElements( elementsToAllocation );
 template< typename Real,
@@ -253,9 +263,21 @@ template< typename Real,
           typename Index >
 Index ChunkedEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const
-   const IndexType& sliceIndex = rowToSliceMapping[ row ];
+   const IndexType& sliceIndex = rowToSliceMapping.getElement( row );
    TNL_ASSERT( sliceIndex < this->rows, );
    const IndexType& chunkSize = slices.getElement( sliceIndex ).chunkSize;
+   return rowPointers.getElement( row + 1 ) - rowPointers.getElement( row );
+template< typename Real,
+          typename Device,
+          typename Index >
+Index ChunkedEllpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+   const IndexType& sliceIndex = rowToSliceMapping[ row ];
+   TNL_ASSERT( sliceIndex < this->rows, );
+   const IndexType& chunkSize = slices[ sliceIndex ].chunkSize;
    return rowPointers[ row + 1 ] - rowPointers[ row ];
@@ -265,16 +287,14 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool ChunkedEllpack< Real, Device, Index >::setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix )
+void ChunkedEllpack< Real, Device, Index >::setLike( const ChunkedEllpack< Real2, Device2, Index2 >& matrix )
    this->chunksInSlice = matrix.chunksInSlice;
    this->desiredChunkSize = matrix.desiredChunkSize;
-   if( ! Sparse< Real, Device, Index >::setLike( matrix ) ||
-       ! this->rowToChunkMapping.setLike( matrix.rowToChunkMapping ) ||
-       ! this->rowToSliceMapping.setLike( matrix.rowToSliceMapping ) ||
-       ! this->slices.setLike( matrix.slices ) )
-      return false;
-   return true;
+   Sparse< Real, Device, Index >::setLike( matrix );
+   this->rowToChunkMapping.setLike( matrix.rowToChunkMapping );
+   this->rowToSliceMapping.setLike( matrix.rowToSliceMapping );
+   this->slices.setLike( matrix.slices );
 template< typename Real,
@@ -916,7 +936,7 @@ getRow( const IndexType rowIndex )
    const IndexType rowOffset = this->rowPointers[ rowIndex ];
    const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
-   return MatrixRow( &this->columns[ rowOffset ],
+   return MatrixRow( &this->columnIndexes[ rowOffset ],
                      &this->values[ rowOffset ],
                      1 );
@@ -926,13 +946,13 @@ template< typename Real,
           typename Device,
           typename Index >
-const typename ChunkedEllpack< Real, Device, Index >::MatrixRow
+typename ChunkedEllpack< Real, Device, Index >::ConstMatrixRow
 ChunkedEllpack< Real, Device, Index >::
 getRow( const IndexType rowIndex ) const
    const IndexType rowOffset = this->rowPointers[ rowIndex ];
    const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
-   return MatrixRow( &this->columns[ rowOffset ],
+   return MatrixRow( &this->columnIndexes[ rowOffset ],
                      &this->values[ rowOffset ],
                      1 );
@@ -1171,6 +1191,46 @@ bool ChunkedEllpack< Real, Device, Index >::performSORIteration( const Vector& b
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+ChunkedEllpack< Real, Device, Index >&
+ChunkedEllpack< Real, Device, Index >::operator=( const ChunkedEllpack& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->columnIndexes = matrix.columnIndexes;
+   this->chunksInSlice = matrix.chunksInSlice;
+   this->desiredChunkSize = matrix.desiredChunkSize;
+   this->rowToChunkMapping = matrix.rowToChunkMapping;
+   this->rowToSliceMapping = matrix.rowToSliceMapping;
+   this->rowPointers = matrix.rowPointers;
+   this->slices = matrix.slices;
+   this->numberOfSlices = matrix.numberOfSlices;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+ChunkedEllpack< Real, Device, Index >&
+ChunkedEllpack< Real, Device, Index >::operator=( const ChunkedEllpack< Real2, Device2, Index2 >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+   this->setLike( matrix );
+   std::cerr << "Cross-device assignment for the ChunkedEllpack format is not implemented yet." << std::endl;
+   throw 1;
 template< typename Real,
           typename Device,
           typename Index >
@@ -1395,7 +1455,7 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda >
             Devices::Cuda::freeFromDevice( kernel_this );
             Devices::Cuda::freeFromDevice( kernel_inVector );
             Devices::Cuda::freeFromDevice( kernel_outVector );
-            checkCudaDevice;
diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 10145c1b9240238feff4a826832021dabc204b18..3904f5c059b210ae9b61aa0f1455d4a2ca762964 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -26,8 +26,16 @@ template< typename Real = double,
           typename Index = int >
 class Dense : public Matrix< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Dense;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -45,16 +53,20 @@ class Dense : public Matrix< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Dense< Real2, Device2, Index2 >& matrix );
+   void setLike( const Dense< Real2, Device2, Index2 >& matrix );
     * This method is only for the compatibility with the sparse matrices.
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
     * Returns maximal number of the nonzero matrix elements that can be stored
@@ -62,6 +74,9 @@ class Dense : public Matrix< Real, Device, Index >
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    IndexType getMaxRowLength() const;
    IndexType getNumberOfMatrixElements() const;
@@ -152,29 +167,15 @@ class Dense : public Matrix< Real, Device, Index >
                    const RealType& matrixMultiplicator = 1.0,
                    const RealType& thisMatrixMultiplicator = 1.0 );
-#ifdef HAVE_NOT_CXX11
-   template< typename Matrix1, typename Matrix2, int tileDim >
-   void getMatrixProduct( const Matrix1& matrix1,
-                       const Matrix2& matrix2,
-                       const RealType& matrix1Multiplicator = 1.0,
-                       const RealType& matrix2Multiplicator = 1.0 );
    template< typename Matrix1, typename Matrix2, int tileDim = 32 >
    void getMatrixProduct( const Matrix1& matrix1,
                        const Matrix2& matrix2,
                        const RealType& matrix1Multiplicator = 1.0,
                        const RealType& matrix2Multiplicator = 1.0 );
-#ifdef HAVE_NOT_CXX11
-   template< typename Matrix, int tileDim >
-   void getTransposition( const Matrix& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
    template< typename Matrix, int tileDim = 32 >
    void getTransposition( const Matrix& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
    template< typename Vector >
    void performSORIteration( const Vector& b,
@@ -182,6 +183,14 @@ class Dense : public Matrix< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   Dense& operator=( const Dense& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix );
    bool save( const String& fileName ) const;
    bool load( const String& fileName );
@@ -192,7 +201,7 @@ class Dense : public Matrix< Real, Device, Index >
    void print( std::ostream& str ) const;
-   protected:
    IndexType getElementIndex( const IndexType row,
@@ -200,11 +209,9 @@ class Dense : public Matrix< Real, Device, Index >
    typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode;
    friend class DenseDeviceDependentCode< DeviceType >;
 } // namespace Matrices
 } // namespace TNL
 #include <TNL/Matrices/Dense_impl.h>
diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h
index aed56985ee61cf0b7c4b342e6f5079fd13c50bd6..bb146105eb4d903dd51b105bc5db49da74c4de44 100644
--- a/src/TNL/Matrices/Dense_impl.h
+++ b/src/TNL/Matrices/Dense_impl.h
@@ -49,14 +49,28 @@ String Dense< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool Dense< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                           const IndexType columns )
+String Dense< Real, Device, Index >::getSerializationType()
-   if( ! Matrix< Real, Device, Index >::setDimensions( rows, columns ) ||
-       ! this->values.setSize( rows * columns ) )
-     return false;
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String Dense< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void Dense< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                  const IndexType columns )
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   this->values.setSize( rows * columns );
    this->values.setValue( 0.0 );
-   return true;
 template< typename Real,
@@ -65,17 +79,16 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix )
+void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix )
-   return this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
 template< typename Real,
           typename Device,
           typename Index >
-bool Dense< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void Dense< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
-   return true;
 template< typename Real,
@@ -86,6 +99,15 @@ Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const
    return this->getColumns();
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+   return this->getColumns();
 template< typename Real,
           typename Device,
           typename Index >
@@ -818,7 +840,7 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix,
                                                          gridIdx_y );
-            checkCudaDevice;
       Devices::Cuda::freeFromDevice( this_device );
       Devices::Cuda::freeFromDevice( matrix_device );
@@ -846,6 +868,39 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector& b,
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Dense< Real, Device, Index >&
+Dense< Real, Device, Index >::operator=( const Dense& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Dense< Real, Device, Index >&
+Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+   this->setLike( matrix );
+   std::cerr << "Cross-device assignment for the Dense format is not implemented yet." << std::endl;
+   throw 1;
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h
index 0082d414dcc545875c1d8473526ec5fbfbd5cd0b..b284f02adb5bd20d7311fc0d4016e20eda0509de 100644
--- a/src/TNL/Matrices/Ellpack.h
+++ b/src/TNL/Matrices/Ellpack.h
@@ -22,8 +22,16 @@ class EllpackDeviceDependentCode;
 template< typename Real, typename Device = Devices::Host, typename Index = int >
 class Ellpack : public Sparse< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Ellpack;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -43,17 +51,24 @@ class Ellpack : public Sparse< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
-   bool setConstantCompressedRowLengths( const IndexType& rowLengths );
+   void setConstantCompressedRowLengths( const IndexType& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Ellpack< Real2, Device2, Index2 >& matrix );
+   void setLike( const Ellpack< Real2, Device2, Index2 >& matrix );
    void reset();
@@ -63,10 +78,6 @@ class Ellpack : public Sparse< Real, Device, Index >
    template< typename Real2, typename Device2, typename Index2 >
    bool operator != ( const Ellpack< Real2, Device2, Index2 >& matrix ) const;
-   /*template< typename Matrix >
-   bool copyFrom( const Matrix& matrix,
-                  const CompressedRowLengthsVector& rowLengths );*/
    bool setElementFast( const IndexType row,
                         const IndexType column,
@@ -156,6 +167,14 @@ class Ellpack : public Sparse< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   Ellpack& operator=( const Ellpack& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Ellpack& operator=( const Ellpack< Real2, Device2, Index2 >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -166,9 +185,9 @@ class Ellpack : public Sparse< Real, Device, Index >
    void print( std::ostream& str ) const;
-   protected:
-   bool allocateElements();
+   void allocateElements();
    IndexType rowLengths, alignedRows;
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index 6a98fd86b280a6a30c94fc6bf2fa641f064cb0b8..4055515b35e3687a3a01688a95b0858da4b1cbc7 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -50,8 +50,24 @@ String Ellpack< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                             const IndexType columns )
+String Ellpack< Real, Device, Index >::getSerializationType()
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String Ellpack< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                    const IndexType columns )
    TNL_ASSERT( rows > 0 && columns > 0,
               std::cerr << "rows = " << rows
@@ -62,33 +78,32 @@ bool Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
       this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
    else this->alignedRows = rows;
    if( this->rowLengths != 0 )
-      return allocateElements();
-   return true;
+      allocateElements();
 template< typename Real,
           typename Device,
           typename Index >
-bool Ellpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void Ellpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
-   TNL_ASSERT( this->getRows() > 0, );
-   TNL_ASSERT( this->getColumns() > 0, );
-   TNL_ASSERT( rowLengths.getSize() > 0, );
+   TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
    this->rowLengths = this->maxRowLength = rowLengths.max();
-   return allocateElements();
+   allocateElements();
 template< typename Real,
           typename Device,
           typename Index >
-bool Ellpack< Real, Device, Index >::setConstantCompressedRowLengths( const IndexType& rowLengths )
+void Ellpack< Real, Device, Index >::setConstantCompressedRowLengths( const IndexType& rowLengths )
    TNL_ASSERT( rowLengths > 0,
               std::cerr << " rowLengths = " << rowLengths );
    this->rowLengths = rowLengths;
    if( this->rows > 0 )
-      return allocateElements();
-   return true;
+      allocateElements();
 template< typename Real,
@@ -99,19 +114,26 @@ Index Ellpack< Real, Device, Index >::getRowLength( const IndexType row ) const
    return this->rowLengths;
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Ellpack< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+   return this->rowLengths;
 template< typename Real,
           typename Device,
           typename Index >
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool Ellpack< Real, Device, Index >::setLike( const Ellpack< Real2, Device2, Index2 >& matrix )
+void Ellpack< Real, Device, Index >::setLike( const Ellpack< Real2, Device2, Index2 >& matrix )
-   if( ! Sparse< Real, Device, Index >::setLike( matrix ) )
-      return false;
+   Sparse< Real, Device, Index >::setLike( matrix );
    this->rowLengths = matrix.rowLengths;
    this->alignedRows = matrix.alignedRows;
-   return true;
 template< typename Real,
@@ -153,16 +175,6 @@ bool Ellpack< Real, Device, Index >::operator != ( const Ellpack< Real2, Device2
    return ! ( ( *this ) == matrix );
-/*template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename Matrix >
-bool Ellpack< Real, Device, Index >::copyFrom( const Matrix& matrix,
-                                                        const CompressedRowLengthsVector& rowLengths )
-   return Matrix< RealType, DeviceType, IndexType >::copyFrom( matrix, rowLengths );
 template< typename Real,
           typename Device,
           typename Index >
@@ -564,17 +576,95 @@ bool Ellpack< Real, Device, Index > :: performSORIteration( const Vector& b,
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Ellpack< Real, Device, Index >&
+Ellpack< Real, Device, Index >::operator=( const Ellpack& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->columnIndexes = matrix.columnIndexes;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Ellpack< Real, Device, Index >&
+Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2 >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+   // setLike does not work here due to different alignment on Cuda and Host
+   this->rowLengths = matrix.rowLengths;
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   const int blockSize = 32;
+   const int blocks = roundUpDivision( this->getRows(), blockSize );
+   // host -> cuda
+   if( std::is_same< Device, Devices::Cuda >::value ) {
+      typename ValuesVector::HostType tmpValues;
+      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      tmpValues.setLike( this->values );
+      tmpColumnIndexes.setLike( this->columnIndexes );
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( Index b = 0; b < blocks; b++ ) {
+         const Index offset = b * blockSize;
+         for( Index j = 0; j < rowLengths; j++ )
+            for( Index i = 0; i < blockSize && offset + i < this->getRows(); i++ ) {
+               tmpValues[ offset + j * alignedRows + i ] = matrix.values[ ( offset + i ) * rowLengths + j ];
+               tmpColumnIndexes[ offset + j * alignedRows + i ] = matrix.columnIndexes[ ( offset + i ) * rowLengths + j ];
+            }
+      }
+      this->values = tmpValues;
+      this->columnIndexes = tmpColumnIndexes;
+   }
+   // cuda -> host
+   if( std::is_same< Device, Devices::Host >::value ) {
+      ValuesVector tmpValues;
+      ColumnIndexesVector tmpColumnIndexes;
+      tmpValues.setLike( matrix.values );
+      tmpColumnIndexes.setLike( matrix.columnIndexes );
+      tmpValues = matrix.values;
+      tmpColumnIndexes = matrix.columnIndexes;
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( Index b = 0; b < blocks; b++ ) {
+         const Index offset = b * rowLengths;
+         for( Index i = 0; i < blockSize && b * blockSize + i < this->getRows(); i++ )
+            for( Index j = 0; j < rowLengths; j++ ) {
+               this->values[ offset + i * rowLengths + j ] = tmpValues[ b * blockSize + j * matrix.alignedRows + i ];
+               this->columnIndexes[ offset + i * rowLengths + j ] = tmpColumnIndexes[ b * blockSize + j * matrix.alignedRows + i ];
+            }
+      }
+   }
+   return *this;
 template< typename Real,
           typename Device,
           typename Index >
 bool Ellpack< Real, Device, Index >::save( File& file ) const
    if( ! Sparse< Real, Device, Index >::save( file) ) return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file.write< IndexType, Devices::Host, IndexType >( &this->rowLengths, 1 ) ) return false;
    if( ! file.write( &this->rowLengths ) ) return false;
    return true;
@@ -584,11 +674,7 @@ template< typename Real,
 bool Ellpack< Real, Device, Index >::load( File& file )
    if( ! Sparse< Real, Device, Index >::load( file) ) return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file.read< IndexType, Devices::Host, IndexType >( &this->rowLengths, 1 ) ) return false;
    if( ! file.read( &this->rowLengths ) ) return false;
    return true;
@@ -634,11 +720,9 @@ void Ellpack< Real, Device, Index >::print( std::ostream& str ) const
 template< typename Real,
           typename Device,
           typename Index >
-bool Ellpack< Real, Device, Index >::allocateElements()
+void Ellpack< Real, Device, Index >::allocateElements()
-   if( ! Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths ) )
-      return false;
-   return true;
+   Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths );
@@ -803,12 +887,12 @@ class EllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
-               checkCudaDevice;
+               TNL_CHECK_CUDA_DEVICE;
             //Devices::Cuda::freeFromDevice( kernel_this );
             //Devices::Cuda::freeFromDevice( kernel_inVector );
             //Devices::Cuda::freeFromDevice( kernel_outVector );
-            checkCudaDevice;
diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index f8117165c4355068dc8cc083370f34165f74e4c2..2af4d0a890c9acf52e98019f23d0aeeb282d77cb 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -22,8 +22,7 @@ template< typename Real = double,
           typename Index = int >
 class Matrix : public virtual Object
-   public:
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -32,17 +31,17 @@ class Matrix : public virtual Object
-   virtual bool setDimensions( const IndexType rows,
+   virtual void setDimensions( const IndexType rows,
                                const IndexType columns );
-   virtual bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0;
+   virtual void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0;
    virtual IndexType getRowLength( const IndexType row ) const = 0;
    virtual void getCompressedRowLengths( Containers::Vector< IndexType, DeviceType, IndexType >& rowLengths ) const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Matrix< Real2, Device2, Index2 >& matrix );
+   void setLike( const Matrix< Real2, Device2, Index2 >& matrix );
    virtual IndexType getNumberOfMatrixElements() const = 0;
@@ -85,30 +84,22 @@ class Matrix : public virtual Object
    virtual Real getElement( const IndexType row,
                             const IndexType column ) const = 0;
-   Matrix< RealType, DeviceType, IndexType >& operator = ( const Matrix< RealType, DeviceType, IndexType >& );
    template< typename Matrix >
    bool operator == ( const Matrix& matrix ) const;
    template< typename Matrix >
    bool operator != ( const Matrix& matrix ) const;
-   template< typename Matrix >
-   bool copyFrom( const Matrix& matrix,
-                  const CompressedRowLengthsVector& rowLengths );
    virtual bool save( File& file ) const;
    virtual bool load( File& file );
    virtual void print( std::ostream& str ) const;
-   protected:
    IndexType rows, columns;
-   public: // TODO: remove this
    ValuesVector values;
@@ -123,8 +114,8 @@ template< typename Matrix,
           typename InVector,
           typename OutVector >
 void MatrixVectorProductCuda( const Matrix& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector );
+                              const InVector& inVector,
+                              OutVector& outVector );
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h
index 31e72fb24288a2fa799a536fef52b3a3079090a6..2c0b8d184841545e3429a1a022a315eb047eb808 100644
--- a/src/TNL/Matrices/MatrixOperations.h
+++ b/src/TNL/Matrices/MatrixOperations.h
@@ -17,6 +17,7 @@
  * The algorithms should be incorporated into the Matrices::Dense class.
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
 #include <TNL/Math.h>
@@ -36,6 +37,8 @@ public:
     *    lda >= m is the leading dimension of two-dimensional array used to store matrix A,
     *    x is a vector of n elements,
     *    y is a vector of m elements.
+    *
+    * It is assumed that n is much smaller than m.
    template< typename RealType,
              typename IndexType >
@@ -49,31 +52,179 @@ public:
          const RealType& beta,
          RealType* y )
-      TNL_ASSERT( m <= lda, );
+      TNL_ASSERT_GT( m, 0, "m must be positive" );
+      TNL_ASSERT_GT( n, 0, "n must be positive" );
+      TNL_ASSERT_GE( lda, m, "lda must be at least m" );
-      if( beta != 0.0 ) {
-#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
-         for( IndexType j = 0; j < m; j++ ) {
-            RealType tmp = 0.0;
-            for( int k = 0; k < n; k++ )
-               tmp += A[ j + k * lda ] * x[ k ];
-            y[ j ] = alpha * tmp + beta * y[ j ];
+      RealType alphax[ n ];
+      for( IndexType k = 0; k < n; k++ )
+         alphax[ k ] = alpha * x[ k ];
+      if( n == 1 ) {
+         if( beta != 0.0 ) {
+            #ifdef HAVE_OPENMP
+            #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
+            #endif
+            for( IndexType j = 0; j < m; j++ )
+               y[ j ] = A[ j ] * alphax[ 0 ] + beta * y[ j ];
+         }
+         else {
+            // the vector y might be uninitialized, and 0.0 * NaN = NaN
+            #ifdef HAVE_OPENMP
+            #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
+            #endif
+            for( IndexType j = 0; j < m; j++ )
+               y[ j ] = A[ j ] * alphax[ 0 ];
       else {
-         // the vector y might be uninitialized, and 0.0 * NaN = NaN
-#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
-         for( IndexType j = 0; j < m; j++ ) {
-            RealType tmp = 0.0;
-            for( int k = 0; k < n; k++ )
-               tmp += A[ j + k * lda ] * x[ k ];
-            y[ j ] = alpha * tmp;
+         // the matrix A should be accessed column-wise so we split the work into small
+         // blocks and each block process by columns, either parallelly or serially
+         constexpr int block_size = 128;
+         const int blocks = m / block_size;
+         #ifdef HAVE_OPENMP
+         #pragma omp parallel if( TNL::Devices::Host::isOMPEnabled() && blocks >= 2 )
+         #endif
+         {
+            RealType aux[ block_size ];
+            #ifdef HAVE_OPENMP
+            #pragma omp for nowait
+            #endif
+            for( int b = 0; b < blocks; b++ ) {
+               const int block_offset = b * block_size;
+               // initialize array for thread-local results
+               for( int j = 0; j < block_size; j++ )
+                  aux[ j ] = 0.0;
+               // compute aux = A * alphax
+               for( int k = 0; k < n; k++ ) {
+                  const int offset = block_offset + k * lda;
+                  for( int j = 0; j < block_size; j++ )
+                     aux[ j ] += A[ offset + j ] * alphax[ k ];
+               }
+               // write result: y = aux + beta * y
+               if( beta != 0.0 ) {
+                  for( int j = 0; j < block_size; j++ )
+                     y[ block_offset + j ] = aux[ j ] + beta * y[ block_offset + j ];
+               }
+               else {
+                  // the vector y might be uninitialized, and 0.0 * NaN = NaN
+                  for( IndexType j = 0; j < block_size; j++ )
+                     y[ block_offset + j ] = aux[ j ];
+               }
+            }
+            // the first thread that reaches here processes the last, incomplete block
+            #ifdef HAVE_OPENMP
+            #pragma omp single nowait
+            #endif
+            {
+               // TODO: unlike the complete blocks, the tail is traversed row-wise
+               if( beta != 0.0 ) {
+                  for( IndexType j = blocks * block_size; j < m; j++ ) {
+                     RealType tmp = 0.0;
+                     for( int k = 0; k < n; k++ )
+                        tmp += A[ j + k * lda ] * alphax[ k ];
+                     y[ j ] = tmp + beta * y[ j ];
+                  }
+               }
+               else {
+                  // the vector y might be uninitialized, and 0.0 * NaN = NaN
+                  for( IndexType j = blocks * block_size; j < m; j++ ) {
+                     RealType tmp = 0.0;
+                     for( int k = 0; k < n; k++ )
+                        tmp += A[ j + k * lda ] * alphax[ k ];
+                     y[ j ] = tmp;
+                  }
+               }
+            }
+         }
+      }
+   }
+   /*
+    * This function performs the matrix-matrix addition
+    *    C = alpha * A + beta * B
+    * where:
+    *    alpha and beta are scalars,
+    *    A, B, C are (m by n) matrices stored in column-major format on Devices::Cuda,
+    *    lda, ldb, ldc (all >= m) are the leading dimensions of matrices A, B, C,
+    *    respectively.
+    *
+    * It is assumed that n is much smaller than m.
+    */
+   template< typename RealType,
+             typename IndexType >
+   static void
+   geam( const IndexType& m,
+         const IndexType& n,
+         const RealType& alpha,
+         const RealType* A,
+         const IndexType& lda,
+         const RealType& beta,
+         const RealType* B,
+         const IndexType& ldb,
+         RealType* C,
+         const IndexType& ldc )
+   {
+      TNL_ASSERT_GT( m, 0, "m must be positive" );
+      TNL_ASSERT_GT( n, 0, "n must be positive" );
+      TNL_ASSERT_GE( lda, m, "lda must be at least m" );
+      TNL_ASSERT_GE( ldb, m, "lda must be at least m" );
+      TNL_ASSERT_GE( ldc, m, "lda must be at least m" );
+      if( n == 1 ) {
+         #ifdef HAVE_OPENMP
+         #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )
+         #endif
+         for( IndexType j = 0; j < m; j++ )
+            C[ j ] = alpha * A[ j ] + beta * B[ j ];
+      }
+      else {
+         // all matrices should be accessed column-wise so we split the work into small
+         // blocks and each block process by columns, either parallelly or serially
+         constexpr int block_size = 128;
+         const int blocks = m / block_size;
+         #ifdef HAVE_OPENMP
+         #pragma omp parallel if( TNL::Devices::Host::isOMPEnabled() && blocks >= 2 )
+         #endif
+         {
+            #ifdef HAVE_OPENMP
+            #pragma omp for nowait
+            #endif
+            for( int b = 0; b < blocks; b++ ) {
+               const int block_offset = b * block_size;
+               for( IndexType j = 0; j < n; j++ ) {
+                  const IndexType offset_A = j * lda + block_offset;
+                  const IndexType offset_B = j * ldb + block_offset;
+                  const IndexType offset_C = j * ldc + block_offset;
+                  for( int i = 0; i < block_size; i++ )
+                     C[ offset_C + i ] = alpha * A[ offset_A + i ] + beta * B[ offset_B + i ];
+               }
+            }
+            // the first thread that reaches here processes the last, incomplete block
+            #ifdef HAVE_OPENMP
+            #pragma omp single nowait
+            #endif
+            {
+               for( IndexType j = 0; j < n; j++ ) {
+                  const IndexType offset_A = j * lda;
+                  const IndexType offset_B = j * ldb;
+                  const IndexType offset_C = j * ldc;
+                  for( IndexType i = blocks * block_size; i < m; i++ )
+                     C[ offset_C + i ] = alpha * A[ offset_A + i ] + beta * B[ offset_B + i ];
+               }
+            }
@@ -81,12 +232,6 @@ public:
 // CUDA kernels
 #ifdef HAVE_CUDA
-#if (__CUDA_ARCH__ >= 300 )
-   static constexpr int Gemv_minBlocksPerMultiprocessor = 8;
-   static constexpr int Gemv_minBlocksPerMultiprocessor = 4;
 template< typename RealType,
           typename IndexType >
 __global__ void
@@ -105,7 +250,7 @@ GemvCudaKernel( const IndexType m,
    RealType* shx = Devices::Cuda::getSharedMemory< RealType >();
    if( threadIdx.x < n )
-      shx[ threadIdx.x ] = x[ threadIdx.x ];
+      shx[ threadIdx.x ] = alpha * x[ threadIdx.x ];
    if( beta != 0.0 ) {
@@ -113,7 +258,7 @@ GemvCudaKernel( const IndexType m,
          RealType tmp = 0.0;
          for( IndexType k = 0; k < n; k++ )
             tmp += A[ elementIdx + k * lda ] * shx[ k ];
-         y[ elementIdx ] = alpha * tmp + beta * y[ elementIdx ];
+         y[ elementIdx ] = tmp + beta * y[ elementIdx ];
          elementIdx += gridSize;
@@ -123,11 +268,39 @@ GemvCudaKernel( const IndexType m,
          RealType tmp = 0.0;
          for( IndexType k = 0; k < n; k++ )
             tmp += A[ elementIdx + k * lda ] * shx[ k ];
-         y[ elementIdx ] = alpha * tmp;
+         y[ elementIdx ] = tmp;
          elementIdx += gridSize;
+template< typename RealType,
+          typename IndexType >
+__global__ void
+GeamCudaKernel( const IndexType m,
+                const IndexType n,
+                const RealType alpha,
+                const RealType* A,
+                const IndexType lda,
+                const RealType beta,
+                const RealType* B,
+                const IndexType ldb,
+                RealType* C,
+                const IndexType ldc )
+   IndexType x = blockIdx.x * blockDim.x + threadIdx.x;
+   const IndexType gridSizeX = blockDim.x * gridDim.x;
+   const IndexType y = blockIdx.y * blockDim.y + threadIdx.y;
+   const IndexType offset_A = y * lda;
+   const IndexType offset_B = y * ldb;
+   const IndexType offset_C = y * ldc;
+   if( y < n )
+      while( x < m ) {
+         C[ x + offset_C ] = alpha * A[ x + offset_A ] + beta * B[ x + offset_B ];
+         x += gridSizeX;
+      }
 // specialization for CUDA
@@ -144,6 +317,8 @@ public:
     *    lda >= m is the leading dimension of two-dimensional array used to store matrix A,
     *    x is a vector of n elements, stored on Devices::Host,
     *    y is a vector of m elements, stored on Devices::Cuda.
+    *
+    * It is assumed that n is much smaller than m.
    template< typename RealType,
              typename IndexType >
@@ -167,19 +342,76 @@ public:
       if( ! Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< RealType, RealType, IndexType >( xDevice.getData(), x, n ) )
          throw 1;
-      dim3 blockSize( 256 );
-      dim3 gridSize;
-      const IndexType desGridSize = 4 * Gemv_minBlocksPerMultiprocessor
-                                      * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
+      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      dim3 blockSize, gridSize;
+      blockSize.x = 256;
       gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
       GemvCudaKernel<<< gridSize, blockSize, n * sizeof( RealType ) >>>(
             m, n,
             alpha, A, lda,
             xDevice.getData(), beta, y );
-      checkCudaDevice;
+      throw Exceptions::CudaSupportMissing();
+   }
+   /*
+    * This function performs the matrix-matrix addition
+    *    C = alpha * A + beta * B
+    * where:
+    *    alpha and beta are scalars,
+    *    A, B, C are (m by n) matrices stored in column-major format on Devices::Cuda,
+    *    lda, ldb, ldc (all >= m) are the leading dimensions of matrices A, B, C,
+    *    respectively.
+    *
+    * It is assumed that n is much smaller than m.
+    */
+   template< typename RealType,
+             typename IndexType >
+   static void
+   geam( const IndexType& m,
+         const IndexType& n,
+         const RealType& alpha,
+         const RealType* A,
+         const IndexType& lda,
+         const RealType& beta,
+         const RealType* B,
+         const IndexType& ldb,
+         RealType* C,
+         const IndexType& ldc )
+   {
+      TNL_ASSERT_GT( m, 0, "m must be positive" );
+      TNL_ASSERT_GT( n, 0, "n must be positive" );
+      TNL_ASSERT_GE( lda, m, "lda must be at least m" );
+      TNL_ASSERT_GE( ldb, m, "lda must be at least m" );
+      TNL_ASSERT_GE( ldc, m, "lda must be at least m" );
+#ifdef HAVE_CUDA
+      dim3 blockSize, gridSize;
+      // max 16 columns of threads
+      blockSize.y = min( n, 16 );
+      // max 256 threads per block, power of 2
+      blockSize.x = 256;
+      while( blockSize.x * blockSize.y > 256 )
+         blockSize.x /= 2;
+      // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors
+      const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) );
+      gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y );
+      GeamCudaKernel<<< gridSize, blockSize >>>(
+            m, n,
+            alpha, A, lda,
+            beta, B, ldb,
+            C, ldc );
-      CudaSupportMissingMessage;
+      throw Exceptions::CudaSupportMissing();
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index 4d0058e5aa6892430f9e84248734c684ad130e8f..f768d748cd62dd5d01923254a5e246c8171b79d1 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -15,6 +15,7 @@
 #include <TNL/String.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Timer.h>
+#include <TNL/Matrices/MatrixReader.h>
 namespace TNL {
 namespace Matrices {   
@@ -55,18 +56,13 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
       return false;
-   if( ! matrix.setDimensions( rows, columns ) ||
-       ! rowLengths.setSize( rows ) )
-   {
-      std::cerr << "Not enough memory to allocate the sparse or the full matrix for testing." << std::endl;
-      return false;
-   }
+   matrix.setDimensions( rows, columns );
+   rowLengths.setSize( rows );
    if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
       return false;
-   if( ! matrix.setCompressedRowLengths( rowLengths ) )
-      return false;
+   matrix.setCompressedRowLengths( rowLengths );
    if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose ) )
       return false;
@@ -163,7 +159,7 @@ bool MatrixReader< Matrix >::checkMtxHeader( const String& header,
                                                 bool& symmetric )
    Containers::List< String > parsedLine;
-   header.parse( parsedLine );
+   header.split( parsedLine );
    if( parsedLine.getSize() < 5 )
       return false;
    if( parsedLine[ 0 ] != "%%MatrixMarket" )
@@ -230,7 +226,7 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file,
-      line. parse( parsedLine );
+      line.split( parsedLine );
       if( parsedLine. getSize() != 3 )
          std::cerr << "Wrong number of parameters in the matrix header." << std::endl;
@@ -365,7 +361,7 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line,
                                                          RealType& value )
    Containers::List< String > parsedLine;
-   line.parse( parsedLine );
+   line.split( parsedLine );
    if( parsedLine.getSize() != 3 )
       std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl;
@@ -410,11 +406,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda >
       if( ! MatrixReader< HostMatrixType >::readMtxFileHostMatrix( file, hostMatrix, rowLengthsVector, verbose ) )
          return false;
-      typename Matrix::CompressedRowLengthsVector cudaCompressedRowLengthsVector;
-      cudaCompressedRowLengthsVector.setLike( rowLengthsVector );
-      cudaCompressedRowLengthsVector = rowLengthsVector;
-      if( ! matrix.copyFrom( hostMatrix, cudaCompressedRowLengthsVector ) )
-         return false;
+      matrix = hostMatrix;
       return true;
diff --git a/src/TNL/Matrices/MatrixWriter_impl.h b/src/TNL/Matrices/MatrixWriter_impl.h
index 1db1a9df9e63435e9052aa65f4d35527038278bc..40368d0dd9fc157ff90ee0766add1cb64f2acca7 100644
--- a/src/TNL/Matrices/MatrixWriter_impl.h
+++ b/src/TNL/Matrices/MatrixWriter_impl.h
@@ -26,7 +26,7 @@ bool MatrixWriter< Matrix >::writeToGnuplot( std::ostream& str,
          RealType elementValue = matrix.getElement( row, column );
          if(  elementValue != ( RealType ) 0.0 )
-            str << column << " " << row << " " << elementValue << std::endl;
+            str << column << " " << row << " " << elementValue << "\n";
       if( verbose )
         std::cout << "Drawing the row " << row << "      \r" << std::flush;
@@ -86,7 +86,7 @@ bool MatrixWriter< Matrix >::writeEpsBody( std::ostream& str,
             str << ( column - lastColumn ) * elementSize
                 << " " << -( row - lastRow ) * elementSize
-                << " translate newpath 0 0 " << elementSize << " " << elementSize << " rectstroke" << std::endl;
+                << " translate newpath 0 0 " << elementSize << " " << elementSize << " rectstroke\n";
             lastColumn = column;
             lastRow = row;
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h
index fb881d16f2eb01845750026525425345bc4167b5..45593b1552dd501e113b1327e1c33e5bb73a3d16 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix_impl.h
@@ -28,14 +28,13 @@ Matrix< Real, Device, Index >::Matrix()
 template< typename Real,
           typename Device,
           typename Index >
- bool Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                       const IndexType columns )
+void Matrix< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                   const IndexType columns )
    TNL_ASSERT( rows > 0 && columns > 0,
-            std::cerr << " rows = " << rows << " columns = " << columns );
+               std::cerr << " rows = " << rows << " columns = " << columns );
    this->rows = rows;
    this->columns = columns;
-   return true;
 template< typename Real,
@@ -54,9 +53,9 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix )
+void Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix )
-   return setDimensions( matrix.getRows(), matrix.getColumns() );
+   setDimensions( matrix.getRows(), matrix.getColumns() );
 template< typename Real,
@@ -86,63 +85,6 @@ void Matrix< Real, Device, Index >::reset()
    this->columns = 0;
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename MatrixT >
-bool Matrix< Real, Device, Index >::copyFrom( const MatrixT& matrix,
-                                              const CompressedRowLengthsVector& rowLengths )
-   /*tnlStaticTNL_ASSERT( DeviceType::DeviceType == Devices::HostDevice, );
-   tnlStaticTNL_ASSERT( DeviceType::DeviceType == Matrix:DeviceType::DeviceType, );*/
-   this->setLike( matrix );
-   if( ! this->setCompressedRowLengths( rowLengths ) )
-      return false;
-   Containers::Vector< RealType, Devices::Host, IndexType > values;
-   Containers::Vector< IndexType, Devices::Host, IndexType > columns;
-   if( ! values.setSize( this->getColumns() ) ||
-       ! columns.setSize( this->getColumns() ) )
-      return false;
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      TNL_ASSERT( false, );
-      // TODO: fix this
-      //matrix.getRow( row, columns.getData(), values.getData() );
-      this->setRow( row, columns.getData(), values.getData(), rowLengths.getElement( row ) );
-   }
-   return true;
-template< typename Real,
-          typename Device,
-          typename Index >
-Matrix< Real, Device, Index >& Matrix< Real, Device, Index >::operator = ( const Matrix< RealType, DeviceType, IndexType >& m )
-   this->setLike( m );
-   Containers::Vector< IndexType, DeviceType, IndexType > rowLengths;
-   m.getCompressedRowLengths( rowLengths );
-   this->setCompressedRowLengths( rowLengths );
-   Containers::Vector< RealType, DeviceType, IndexType > rowValues;
-   Containers::Vector< IndexType, DeviceType, IndexType > rowColumns;
-   const IndexType maxRowLength = rowLengths.max();
-   rowValues.setSize( maxRowLength );
-   rowColumns.setSize( maxRowLength );
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      m.getRow( row,
-                rowColumns.getData(),
-                rowValues.getData() );
-      this->setRow( row,
-                    rowColumns.getData(),
-                    rowValues.getData(),
-                    m.getRowLength( row ) );
-   }
-   return *this;
 template< typename Real,
           typename Device,
           typename Index >
@@ -173,19 +115,11 @@ template< typename Real,
           typename Index >
 bool Matrix< Real, Device, Index >::save( File& file ) const
-#ifdef HAVE_NOT_CXX11
-   if( ! Object::save( file ) ||
-       ! file.write< IndexType, Devices::Host, Index >( &this->rows, 1 ) ||
-       ! file.write< IndexType, Devices::Host, Index >( &this->columns, 1 ) ||
-       ! this->values.save( file ) )
-      return false;
    if( ! Object::save( file ) ||
        ! file.write( &this->rows ) ||
        ! file.write( &this->columns ) ||
        ! this->values.save( file ) )
       return false;
    return true;
@@ -194,19 +128,11 @@ template< typename Real,
           typename Index >
 bool Matrix< Real, Device, Index >::load( File& file )
-#ifdef HAVE_NOT_CXX11
-   if( ! Object::load( file ) ||
-       ! file.read< IndexType, Devices::Host, Index >( &this->rows, 1 ) ||
-       ! file.read< IndexType, Devices::Host, Index >( &this->columns, 1 ) ||
-       ! this->values.load( file ) )
-      return false;
    if( ! Object::load( file ) ||
        ! file.read( &this->rows ) ||
        ! file.read( &this->columns ) ||
        ! this->values.load( file ) )
       return false;
    return true;
@@ -257,12 +183,12 @@ void MatrixVectorProductCuda( const Matrix& matrix,
                                        gridIdx );
-      checkCudaDevice;
    Devices::Cuda::freeFromDevice( kernel_this );
    Devices::Cuda::freeFromDevice( kernel_inVector );
    Devices::Cuda::freeFromDevice( kernel_outVector );
-   checkCudaDevice;
diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h
index 5acd602ac2a7b4c34b3a52d99a34f2297c71c527..28de74b8ac64b9fdf002aa71a4ca6a34a6ad4b26 100644
--- a/src/TNL/Matrices/Multidiagonal.h
+++ b/src/TNL/Matrices/Multidiagonal.h
@@ -23,8 +23,16 @@ class MultidiagonalDeviceDependentCode;
 template< typename Real, typename Device = Devices::Host, typename Index = int >
 class Multidiagonal : public Matrix< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Multidiagonal;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -42,22 +50,29 @@ class Multidiagonal : public Matrix< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    IndexType getMaxRowLength() const;
    template< typename Vector >
-   bool setDiagonals( const Vector& diagonals );
+   void setDiagonals( const Vector& diagonals );
    const Containers::Vector< Index, Device, Index >& getDiagonals() const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix );
+   void setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix );
    IndexType getNumberOfMatrixElements() const;
@@ -168,6 +183,14 @@ class Multidiagonal : public Matrix< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   Multidiagonal& operator=( const Multidiagonal& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Multidiagonal& operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -178,7 +201,7 @@ class Multidiagonal : public Matrix< Real, Device, Index >
    void print( std::ostream& str ) const;
-   protected:
    bool getElementIndex( const IndexType row,
                          const IndexType column,
@@ -195,8 +218,6 @@ class Multidiagonal : public Matrix< Real, Device, Index >
    typedef MultidiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
    friend class MultidiagonalDeviceDependentCode< DeviceType >;
 } // namespace Matrices
diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h b/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h
index c126162caf8d128aa473cc1896ab75059a6d130d..26f6b2994f1f46440f170e42d32689e46807ab77 100644
--- a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h
+++ b/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h
@@ -29,14 +29,14 @@ setupMatrix( const MeshType& mesh,
    matrix.setDimensions( dofs, dofs );
    CoordinatesType centerCell( stencilSize );
    Containers::Vector< Index, Device, Index > diagonals;
-   if( ! diagonals.setSize( 3 ) )
-      return false;
+   diagonals.setSize( 3 );
    Index centerCellIndex = mesh.getCellIndex( CoordinatesType( stencilSize ) );
    diagonals.setElement( 0, mesh.getCellIndex( CoordinatesType( stencilSize - 1 ) ) - centerCellIndex );
    diagonals.setElement( 1, 0 );
    diagonals.setElement( 2, mesh.getCellIndex( CoordinatesType( stencilSize + 1 ) ) - centerCellIndex );
    //cout << "Setting the multidiagonal matrix offsets to: " << diagonals << std::endl;
-   return matrix.setDiagonals( diagonals );
+   matrix.setDiagonals( diagonals );
+   return true;
 template< typename MeshReal,
@@ -55,8 +55,7 @@ setupMatrix( const MeshType& mesh,
    matrix.setDimensions( dofs, dofs );
    CoordinatesType centerCell( stencilSize );
    Containers::Vector< Index, Device, Index > diagonals;
-   if( ! diagonals.setSize( 5 ) )
-      return false;
+   diagonals.setSize( 5 );
    Index centerCellIndex = mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize ) );
    diagonals.setElement( 0, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize - 1 ) ) - centerCellIndex );
    diagonals.setElement( 1, mesh.getCellIndex( CoordinatesType( stencilSize - 1, stencilSize ) ) - centerCellIndex );
@@ -64,7 +63,8 @@ setupMatrix( const MeshType& mesh,
    diagonals.setElement( 3, mesh.getCellIndex( CoordinatesType( stencilSize + 1, stencilSize ) ) - centerCellIndex );
    diagonals.setElement( 4, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize + 1 ) ) - centerCellIndex );
    //cout << "Setting the multidiagonal matrix offsets to: " << diagonals << std::endl;
-   return matrix.setDiagonals( diagonals );
+   matrix.setDiagonals( diagonals );
+   return true;
 template< typename MeshReal,
@@ -83,8 +83,7 @@ setupMatrix( const MeshType& mesh,
    matrix.setDimensions( dofs, dofs );
    CoordinatesType centerCell( stencilSize );
    Containers::Vector< Index, Device, Index > diagonals;
-   if( ! diagonals.setSize( 7 ) )
-      return false;
+   diagonals.setSize( 7 );
    Index centerCellIndex = mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize, stencilSize ) );
    diagonals.setElement( 0, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize, stencilSize - 1 ) ) - centerCellIndex );
    diagonals.setElement( 1, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize - 1, stencilSize ) ) - centerCellIndex );
@@ -94,7 +93,8 @@ setupMatrix( const MeshType& mesh,
    diagonals.setElement( 5, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize + 1, stencilSize ) ) - centerCellIndex );
    diagonals.setElement( 6, mesh.getCellIndex( CoordinatesType( stencilSize, stencilSize, stencilSize + 1 ) ) - centerCellIndex );
    //cout << "Setting the multidiagonal matrix offsets to: " << diagonals << std::endl;
-   return matrix.setDiagonals( diagonals );
+   matrix.setDiagonals( diagonals );
+   return true;
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h
index 9545a2070c66b7c27a7cf2cb2e7a2e716bc74d64..47d827b93df0904b568c234cc8bfbea602479e46 100644
--- a/src/TNL/Matrices/Multidiagonal_impl.h
+++ b/src/TNL/Matrices/Multidiagonal_impl.h
@@ -50,32 +50,44 @@ String Multidiagonal< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool Multidiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                                   const IndexType columns )
+String Multidiagonal< Real, Device, Index >::getSerializationType()
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String Multidiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void Multidiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                          const IndexType columns )
    TNL_ASSERT( rows > 0 && columns > 0,
               std::cerr << "rows = " << rows
                    << " columns = " << columns << std::endl );
-   if( ! Matrix< Real, Device, Index >::setDimensions( rows, columns ) )
-      return false;
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
    if( this->diagonalsShift.getSize() != 0 )
-      if( ! this->values.setSize( min( this->rows, this->columns ) * this->diagonalsShift.getSize() ) )
-         return false;
+      this->values.setSize( min( this->rows, this->columns ) * this->diagonalsShift.getSize() );
       this->values.setValue( 0.0 );
-   return true;
 template< typename Real,
           typename Device,
           typename Index >
-bool Multidiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void Multidiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
     * TODO: implement some check here similar to the one in the tridiagonal matrix
-   return true;
 template< typename Real,
@@ -93,6 +105,22 @@ Index Multidiagonal< Real, Device, Index >::getRowLength( const IndexType row )
    return rowLength;
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Multidiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+   IndexType rowLength( 0 );
+   for( IndexType i = 0; i < diagonalsShift.getSize(); i++ )
+   {
+      const IndexType column = row + diagonalsShift[ i ];
+      if( column >= 0 && column < this->getColumns() )
+         rowLength++;
+   }
+   return rowLength;
 template< typename Real,
           typename Device,
           typename Index >
@@ -107,7 +135,7 @@ template< typename Real,
           typename Device,
           typename Index >
    template< typename Vector >
-bool Multidiagonal< Real, Device, Index > :: setDiagonals(  const Vector& diagonals )
+void Multidiagonal< Real, Device, Index > :: setDiagonals(  const Vector& diagonals )
    TNL_ASSERT( diagonals.getSize() > 0,
               std::cerr << "New number of diagonals = " << diagonals.getSize() << std::endl );
@@ -115,11 +143,9 @@ bool Multidiagonal< Real, Device, Index > :: setDiagonals(  const Vector& diagon
    this->diagonalsShift = diagonals;
    if( this->rows != 0 && this->columns != 0 )
-      if( ! this->values.setSize( min( this->rows, this->columns ) * this->diagonalsShift.getSize() ) )
-         return false;
+      this->values.setSize( min( this->rows, this->columns ) * this->diagonalsShift.getSize() );
       this->values.setValue( 0.0 );
-   return true;
 template< typename Real,
@@ -136,13 +162,10 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool Multidiagonal< Real, Device, Index > :: setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix )
+void Multidiagonal< Real, Device, Index > :: setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix )
-   if( ! this->setDimensions( matrix.getRows(), matrix.getColumns() ) )
-      return false;
-   if( ! setDiagonals( matrix.getDiagonals() ) )
-      return false;
-   return true;
+   this->setDimensions( matrix.getRows(), matrix.getColumns() );
+   setDiagonals( matrix.getDiagonals() );
 template< typename Real,
@@ -591,6 +614,39 @@ bool Multidiagonal< Real, Device, Index > :: performSORIteration( const Vector&
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Multidiagonal< Real, Device, Index >&
+Multidiagonal< Real, Device, Index >::operator=( const Multidiagonal& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->diagonalsShift = matrix.diagonalsShift;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Multidiagonal< Real, Device, Index >&
+Multidiagonal< Real, Device, Index >::operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+   this->setLike( matrix );
+   std::cerr << "Cross-device assignment for the Multidiagonal format is not implemented yet." << std::endl;
+   throw 1;
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h
index ebae0c936e4a8b95beecc503e771ce1ed5d0b666..0557d26ebbd94ee57ab2bcd51bb029f48cbd30c7 100644
--- a/src/TNL/Matrices/SlicedEllpack.h
+++ b/src/TNL/Matrices/SlicedEllpack.h
@@ -51,17 +51,25 @@ template< typename Real,
           int SliceSize >
 class SlicedEllpack : public Sparse< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2, int SliceSize2 >
+   friend class SlicedEllpack;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
    typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
-   typedef SlicedEllpack< Real, Device, Index > ThisType;
-   typedef SlicedEllpack< Real, Devices::Host, Index > HostType;
-   typedef SlicedEllpack< Real, Devices::Cuda, Index > CudaType;
+   typedef SlicedEllpack< Real, Device, Index, SliceSize > ThisType;
+   typedef SlicedEllpack< Real, Devices::Host, Index, SliceSize > HostType;
+   typedef SlicedEllpack< Real, Devices::Cuda, Index, SliceSize > CudaType;
    typedef Sparse< Real, Device, Index > BaseType;
    typedef typename BaseType::MatrixRow MatrixRow;
    typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
@@ -73,15 +81,22 @@ class SlicedEllpack : public Sparse< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
+   void setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
    void reset();
@@ -178,6 +193,14 @@ class SlicedEllpack : public Sparse< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   SlicedEllpack& operator=( const SlicedEllpack& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   SlicedEllpack& operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -188,7 +211,7 @@ class SlicedEllpack : public Sparse< Real, Device, Index >
    void print( std::ostream& str ) const;
-   protected:
    Containers::Vector< Index, Device, Index > slicePointers, sliceCompressedRowLengths;
@@ -201,12 +224,10 @@ class SlicedEllpack : public Sparse< Real, Device, Index >
    // TODO: The friend declaration above does not work because of __global__ storage specifier. Therefore we declare the following method as public. Fix this, when possible.
-   public:
    __device__ void computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths,
                                                         const IndexType sliceIdx );
 } // namespace Matrices
diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h
index 097f1b6772ceb4035ceba25732872de122377051..a08aaf19bc479ce4f9fa7c4b99775af0479c78b0 100644
--- a/src/TNL/Matrices/SlicedEllpack_impl.h
+++ b/src/TNL/Matrices/SlicedEllpack_impl.h
@@ -51,34 +51,53 @@ template< typename Real,
           typename Device,
           typename Index,
           int SliceSize >
-bool SlicedEllpack< Real, Device, Index, SliceSize >::setDimensions( const IndexType rows,
-                                                                              const IndexType columns )
+String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+void SlicedEllpack< Real, Device, Index, SliceSize >::setDimensions( const IndexType rows,
+                                                                     const IndexType columns )
    TNL_ASSERT( rows > 0 && columns > 0,
               std::cerr << "rows = " << rows
                    << " columns = " << columns << std::endl );
-   return Sparse< Real, Device, Index >::setDimensions( rows, columns );
+   Sparse< Real, Device, Index >::setDimensions( rows, columns );
 template< typename Real,
           typename Device,
           typename Index,
           int SliceSize >
-bool SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
-   TNL_ASSERT( this->getRows() > 0, );
-   TNL_ASSERT( this->getColumns() > 0, );
+   TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" );
+   TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
    const IndexType slices = roundUpDivision( this->rows, SliceSize );
-   if( ! this->sliceCompressedRowLengths.setSize( slices ) ||
-       ! this->slicePointers.setSize( slices + 1 ) )
-      return false;
+   this->sliceCompressedRowLengths.setSize( slices );
+   this->slicePointers.setSize( slices + 1 );
    DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths );
    this->maxRowLength = rowLengths.max();
-   return this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
+   this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
 template< typename Real,
@@ -91,6 +110,17 @@ Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLength( const Index
    return this->sliceCompressedRowLengths.getElement( slice );
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLengthFast( const IndexType row ) const
+   const IndexType slice = row / SliceSize;
+   return this->sliceCompressedRowLengths[ slice ];
 template< typename Real,
           typename Device,
           typename Index,
@@ -98,13 +128,11 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool SlicedEllpack< Real, Device, Index, SliceSize >::setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
+void SlicedEllpack< Real, Device, Index, SliceSize >::setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
-   if( !Sparse< Real, Device, Index >::setLike( matrix ) ||
-       ! this->slicePointers.setLike( matrix.slicePointers ) ||
-       ! this->sliceCompressedRowLengths.setLike( matrix.sliceCompressedRowLengths ) )
-      return false;
-   return true;
+   Sparse< Real, Device, Index >::setLike( matrix );
+   this->slicePointers.setLike( matrix.slicePointers );
+   this->sliceCompressedRowLengths.setLike( matrix.sliceCompressedRowLengths );
 template< typename Real,
@@ -573,6 +601,95 @@ bool SlicedEllpack< Real, Device, Index, SliceSize >::performSORIteration( const
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+SlicedEllpack< Real, Device, Index, SliceSize >&
+SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   this->columnIndexes = matrix.columnIndexes;
+   this->slicePointers = matrix.slicePointers;
+   this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index,
+          int SliceSize >
+   template< typename Real2, typename Device2, typename Index2, typename >
+SlicedEllpack< Real, Device, Index, SliceSize >&
+SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value || std::is_same< Device, Devices::MIC >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value || std::is_same< Device2, Devices::MIC >::value,
+                  "unknown device" );
+   this->setLike( matrix );
+   this->slicePointers = matrix.slicePointers;
+   this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
+   // host -> cuda
+   if( std::is_same< Device, Devices::Cuda >::value ) {
+      typename ValuesVector::HostType tmpValues;
+      typename ColumnIndexesVector::HostType tmpColumnIndexes;
+      tmpValues.setLike( matrix.values );
+      tmpColumnIndexes.setLike( matrix.columnIndexes );
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( Index sliceIdx = 0; sliceIdx < matrix.sliceCompressedRowLengths.getSize(); sliceIdx++ ) {
+         const Index rowLength = matrix.sliceCompressedRowLengths[ sliceIdx ];
+         const Index offset = matrix.slicePointers[ sliceIdx ];
+         for( Index j = 0; j < rowLength; j++ )
+            for( Index i = 0; i < SliceSize; i++ ) {
+               tmpValues[ offset + j * SliceSize + i ] = matrix.values[ offset + i * rowLength + j ];
+               tmpColumnIndexes[ offset + j * SliceSize + i ] = matrix.columnIndexes[ offset + i * rowLength + j ];
+            }
+      }
+      this->values = tmpValues;
+      this->columnIndexes = tmpColumnIndexes;
+   }
+   // cuda -> host
+   if( std::is_same< Device, Devices::Host >::value ) {
+      ValuesVector tmpValues;
+      ColumnIndexesVector tmpColumnIndexes;
+      tmpValues.setLike( matrix.values );
+      tmpColumnIndexes.setLike( matrix.columnIndexes );
+      tmpValues = matrix.values;
+      tmpColumnIndexes = matrix.columnIndexes;
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( Index sliceIdx = 0; sliceIdx < sliceCompressedRowLengths.getSize(); sliceIdx++ ) {
+         const Index rowLength = sliceCompressedRowLengths[ sliceIdx ];
+         const Index offset = slicePointers[ sliceIdx ];
+         for( Index i = 0; i < SliceSize; i++ )
+            for( Index j = 0; j < rowLength; j++ ) {
+               this->values[ offset + i * rowLength + j ] = tmpValues[ offset + j * SliceSize + i ];
+               this->columnIndexes[ offset + i * rowLength + j ] = tmpColumnIndexes[ offset + j * SliceSize + i ];
+            }
+      }
+   }
+   if( std::is_same< Device, Devices::MIC >::value ) {
+      throw std::runtime_error("Not Implemented yet for MIC");
+   }
+   return *this;
 template< typename Real,
           typename Device,
           typename Index,
@@ -623,23 +740,30 @@ template< typename Real,
           int SliceSize >
 void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) const
-   for( IndexType row = 0; row < this->getRows(); row++ )
-   {
-      str <<"Row: " << row << " -> ";
-      const IndexType sliceIdx = row / SliceSize;
-      const IndexType rowLength = this->sliceCompressedRowLengths.getElement( sliceIdx );
-      IndexType elementPtr = this->slicePointers.getElement( sliceIdx ) +
-                             rowLength * ( row - sliceIdx * SliceSize );
-      const IndexType rowEnd( elementPtr + rowLength );
-      while( elementPtr < rowEnd &&
-             this->columnIndexes.getElement( elementPtr ) < this->columns &&
-             this->columnIndexes.getElement( elementPtr ) != this->getPaddingIndex() )
+   if( std::is_same< Device, Devices::Host >::value ) {
+      for( IndexType row = 0; row < this->getRows(); row++ )
-         const Index column = this->columnIndexes.getElement( elementPtr );
-         str << " Col:" << column << "->" << this->values.getElement( elementPtr ) << "\t";
-         elementPtr++;
+         str <<"Row: " << row << " -> ";
+         const IndexType sliceIdx = row / SliceSize;
+         const IndexType rowLength = this->sliceCompressedRowLengths.getElement( sliceIdx );
+         IndexType elementPtr = this->slicePointers.getElement( sliceIdx ) +
+                                rowLength * ( row - sliceIdx * SliceSize );
+         const IndexType rowEnd( elementPtr + rowLength );
+         while( elementPtr < rowEnd &&
+                this->columnIndexes.getElement( elementPtr ) < this->columns &&
+                this->columnIndexes.getElement( elementPtr ) != this->getPaddingIndex() )
+         {
+            const Index column = this->columnIndexes.getElement( elementPtr );
+            str << " Col:" << column << "->" << this->values.getElement( elementPtr ) << "\t";
+            elementPtr++;
+         }
+         str << std::endl;
-      str << std::endl;
+   }
+   else {
+      HostType hostMatrix;
+      hostMatrix = *this;
+      hostMatrix.print( str );
@@ -882,7 +1006,7 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_rowLengths );
-         checkCudaDevice;
          return true;
@@ -923,17 +1047,71 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda >
                   gridIdx );
-               checkCudaDevice;
+               TNL_CHECK_CUDA_DEVICE;
             //Devices::Cuda::freeFromDevice( kernel_this );
             //Devices::Cuda::freeFromDevice( kernel_inVector );
             //Devices::Cuda::freeFromDevice( kernel_outVector );
-            checkCudaDevice;
+class SlicedEllpackDeviceDependentCode< Devices::MIC >
+   public:
+      typedef Devices::MIC Device;
+      template< typename Real,
+                typename Index,
+                int SliceSize >
+      static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
+                                   const Index row,
+                                   Index& rowBegin,
+                                   Index& rowEnd,
+                                   Index& step )
+      {
+         throw std::runtime_error("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverse");
+      }
+      template< typename Real,
+                typename Index,
+                int SliceSize >
+      __cuda_callable__
+      static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
+                                       const Index row,
+                                       Index& rowBegin,
+                                       Index& rowEnd,
+                                       Index& step )
+      {
+         throw std::runtime_error("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverseFast");
+      }
+      template< typename Real,
+                typename Index,
+                int SliceSize >
+            static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
+                                                   const typename SlicedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths )
+      {
+         throw std::runtime_error("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::computeMaximalRowLengthInSlices");
+      }
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector,
+                int SliceSize >
+      static void vectorProduct( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         throw std::runtime_error("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::vectorProduct");
+      }
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Sparse.h
index c1f6e168ed8dcf7d3be44cea43bbdc3dc4ff707f..2a694826b94e9d757079f72942f3f810ce136885 100644
--- a/src/TNL/Matrices/Sparse.h
+++ b/src/TNL/Matrices/Sparse.h
@@ -34,10 +34,10 @@ class Sparse : public Matrix< Real, Device, Index >
-   virtual bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0;
+   virtual void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Sparse< Real2, Device2, Index2 >& matrix );
+   void setLike( const Sparse< Real2, Device2, Index2 >& matrix );
    IndexType getNumberOfMatrixElements() const;
@@ -58,13 +58,20 @@ class Sparse : public Matrix< Real, Device, Index >
-   bool allocateMatrixElements( const IndexType& numberOfMatrixElements );
+   void allocateMatrixElements( const IndexType& numberOfMatrixElements );
    Containers::Vector< Index, Device, Index > columnIndexes;
    Index maxRowLength;
+// This cannot be a method of the Sparse class, because the implementation uses
+// methods (marked with __cuda_callable__) which are defined only on the
+// subclasses, but are not virtual methods of Sparse.
+template< typename Matrix1, typename Matrix2 >
+void copySparseMatrix( Matrix1& A, const Matrix2& B );
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index 4f415976a7306b512162272df8142c444b92e5e0..61b3b21d68e4488493431faf890afd5c2537a1fe 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -10,8 +10,11 @@
 #pragma once
+#include "Sparse.h"
+#include <TNL/DevicePointer.h>
 namespace TNL {
-namespace Matrices {   
+namespace Matrices {
 template< typename Real,
           typename Device,
@@ -27,12 +30,10 @@ template< typename Real,
    template< typename Real2,
              typename Device2,
              typename Index2 >
-bool Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix )
+void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix )
-   if( ! Matrix< Real, Device, Index >::setLike( matrix ) ||
-       ! this->allocateMatrixElements( matrix.getNumberOfMatrixElements() ) )
-      return false;
-   return true;
+   Matrix< Real, Device, Index >::setLike( matrix );
+   this->allocateMatrixElements( matrix.getNumberOfMatrixElements() );
 template< typename Real,
@@ -112,18 +113,16 @@ bool Sparse< Real, Device, Index >::load( File& file )
 template< typename Real,
           typename Device,
           typename Index >
-bool Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements )
+void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements )
-   if( ! this->values.setSize( numberOfMatrixElements ) ||
-       ! this->columnIndexes.setSize( numberOfMatrixElements ) )
-      return false;
+   this->values.setSize( numberOfMatrixElements );
+   this->columnIndexes.setSize( numberOfMatrixElements );
     * Setting a column index to this->columns means that the
     * index is undefined.
    this->columnIndexes.setValue( this->columns );
-   return true;
 template< typename Real,
@@ -131,6 +130,147 @@ template< typename Real,
           typename Index >
 void Sparse< Real, Device, Index >::printStructure( std::ostream& str ) const
+   TNL_ASSERT_TRUE( false, "Not implemented yet." );
+#ifdef HAVE_CUDA
+template< typename Vector, typename Matrix >
+__global__ void
+SparseMatrixSetRowLengthsVectorKernel( Vector* rowLengths,
+                                       const Matrix* matrix,
+                                       typename Matrix::IndexType rows,
+                                       typename Matrix::IndexType cols )
+   using IndexType = typename Matrix::IndexType;
+   IndexType rowIdx = blockIdx.x * blockDim.x + threadIdx.x;
+   const IndexType gridSize = blockDim.x * gridDim.x;
+   while( rowIdx < rows ) {
+      const auto max_length = matrix->getRowLengthFast( rowIdx );
+      const auto row = matrix->getRow( rowIdx );
+      IndexType length = 0;
+      for( IndexType c_j = 0; c_j < max_length; c_j++ )
+         if( row.getElementColumn( c_j ) < cols )
+            length++;
+         else
+            break;
+      rowLengths[ rowIdx ] = length;
+      rowIdx += gridSize;
+   }
+template< typename Matrix1, typename Matrix2 >
+__global__ void
+SparseMatrixCopyKernel( Matrix1* A,
+                        const Matrix2* B,
+                        const typename Matrix2::IndexType* rowLengths,
+                        typename Matrix2::IndexType rows )
+   using IndexType = typename Matrix2::IndexType;
+   IndexType rowIdx = blockIdx.x * blockDim.x + threadIdx.x;
+   const IndexType gridSize = blockDim.x * gridDim.x;
+   while( rowIdx < rows ) {
+      const auto length = rowLengths[ rowIdx ];
+      const auto rowB = B->getRow( rowIdx );
+      auto rowA = A->getRow( rowIdx );
+      for( IndexType c = 0; c < length; c++ )
+         rowA.setElement( c, rowB.getElementColumn( c ), rowB.getElementValue( c ) );
+      rowIdx += gridSize;
+   }
+template< typename Matrix1, typename Matrix2 >
+copySparseMatrix( Matrix1& A, const Matrix2& B )
+   static_assert( std::is_same< typename Matrix1::RealType, typename Matrix2::RealType >::value,
+                  "The matrices must have the same RealType." );
+   static_assert( std::is_same< typename Matrix1::DeviceType, typename Matrix2::DeviceType >::value,
+                  "The matrices must be allocated on the same device." );
+   static_assert( std::is_same< typename Matrix1::IndexType, typename Matrix2::IndexType >::value,
+                  "The matrices must have the same IndexType." );
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
+   const IndexType rows = B.getRows();
+   const IndexType cols = B.getColumns();
+   A.setDimensions( rows, cols );
+   if( std::is_same< DeviceType, Devices::Host >::value ) {
+      // set row lengths
+      typename Matrix1::CompressedRowLengthsVector rowLengths;
+      rowLengths.setSize( rows );
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( IndexType i = 0; i < rows; i++ ) {
+         const auto max_length = B.getRowLength( i );
+         const auto row = B.getRow( i );
+         IndexType length = 0;
+         for( IndexType c_j = 0; c_j < max_length; c_j++ )
+            if( row.getElementColumn( c_j ) < cols )
+               length++;
+            else
+               break;
+         rowLengths[ i ] = length;
+      }
+      A.setCompressedRowLengths( rowLengths );
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+      for( IndexType i = 0; i < rows; i++ ) {
+         const auto length = rowLengths[ i ];
+         const auto rowB = B.getRow( i );
+         auto rowA = A.getRow( i );
+         for( IndexType c = 0; c < length; c++ )
+            rowA.setElement( c, rowB.getElementColumn( c ), rowB.getElementValue( c ) );
+      }
+   }
+   if( std::is_same< DeviceType, Devices::Cuda >::value ) {
+#ifdef HAVE_CUDA
+      dim3 blockSize( 256 );
+      dim3 gridSize;
+      const IndexType desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+      gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( rows, blockSize.x ) );
+      typename Matrix1::CompressedRowLengthsVector rowLengths;
+      rowLengths.setSize( rows );
+      DevicePointer< Matrix1 > Apointer( A );
+      const DevicePointer< const Matrix2 > Bpointer( B );
+      // set row lengths
+      Devices::Cuda::synchronizeDevice();
+      SparseMatrixSetRowLengthsVectorKernel<<< gridSize, blockSize >>>(
+            rowLengths.getData(),
+            &Bpointer.template getData< TNL::Devices::Cuda >(),
+            rows,
+            cols );
+      Apointer->setCompressedRowLengths( rowLengths );
+      // copy rows
+      Devices::Cuda::synchronizeDevice();
+      SparseMatrixCopyKernel<<< gridSize, blockSize >>>(
+            &Apointer.template modifyData< TNL::Devices::Cuda >(),
+            &Bpointer.template getData< TNL::Devices::Cuda >(),
+            rowLengths.getData(),
+            rows );
+      throw Exceptions::CudaSupportMissing();
+   }
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index 88c9cc9164d8bff79a1cef9b304b368e6ac3c606..0769ca83f2f0355ee3ad4c726c601bd16bae8c3f 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -25,8 +25,16 @@ template< typename Real = double,
           typename Index = int >
 class Tridiagonal : public Matrix< Real, Device, Index >
-   public:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Tridiagonal;
    typedef Real RealType;
    typedef Device DeviceType;
    typedef Index IndexType;
@@ -43,17 +51,24 @@ class Tridiagonal : public Matrix< Real, Device, Index >
    String getTypeVirtual() const;
-   bool setDimensions( const IndexType rows,
+   static String getSerializationType();
+   virtual String getSerializationTypeVirtual() const;
+   void setDimensions( const IndexType rows,
                        const IndexType columns );
-   bool setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
+   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );
    IndexType getRowLength( const IndexType row ) const;
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
    IndexType getMaxRowLength() const;
    template< typename Real2, typename Device2, typename Index2 >
-   bool setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
    IndexType getNumberOfMatrixElements() const;
@@ -148,15 +163,9 @@ class Tridiagonal : public Matrix< Real, Device, Index >
                    const RealType& matrixMultiplicator = 1.0,
                    const RealType& thisMatrixMultiplicator = 1.0 );
-#ifdef HAVE_NOT_CXX11
    template< typename Real2, typename Index2 >
    void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
                           const RealType& matrixMultiplicator = 1.0 );
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
    template< typename Vector >
@@ -165,6 +174,14 @@ class Tridiagonal : public Matrix< Real, Device, Index >
                              Vector& x,
                              const RealType& omega = 1.0 ) const;
+   // copy assignment
+   Tridiagonal& operator=( const Tridiagonal& matrix );
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
    bool save( File& file ) const;
    bool load( File& file );
@@ -175,7 +192,7 @@ class Tridiagonal : public Matrix< Real, Device, Index >
    void print( std::ostream& str ) const;
-   protected:
    IndexType getElementIndex( const IndexType row,
diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h
index 9d15ba841de1a7d64d284aa4e6301d8b9e6f6e48..f3c073cd0d65285643df34d8fc2885e802c77e36 100644
--- a/src/TNL/Matrices/Tridiagonal_impl.h
+++ b/src/TNL/Matrices/Tridiagonal_impl.h
@@ -48,44 +48,65 @@ String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
 template< typename Real,
           typename Device,
           typename Index >
-bool Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                                 const IndexType columns )
+String Tridiagonal< Real, Device, Index >::getSerializationType()
-   if( ! Matrix< Real, Device, Index >::setDimensions( rows, columns ) )
-      return false;
-   if( ! values.setSize( 3*min( rows, columns ) ) )
-      return false;
+   return getType();
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+   return this->getSerializationType();
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                        const IndexType columns )
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   values.setSize( 3*min( rows, columns ) );
    this->values.setValue( 0.0 );
-   return true;
 template< typename Real,
           typename Device,
           typename Index >
-bool Tridiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
+void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
    if( rowLengths[ 0 ] > 2 )
-      return false;
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    for( Index i = 1; i < diagonalLength-1; i++ )
       if( rowLengths[ i ] > 3 )
-         return false;
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() > this->getColumns() )
       if( rowLengths[ this->getRows()-1 ] > 1 )
-         return false;
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() == this->getColumns() )
       if( rowLengths[ this->getRows()-1 ] > 2 )
-         return false;
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() < this->getColumns() )
       if( rowLengths[ this->getRows()-1 ] > 3 )
-         return false;
-   return true;
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
 template< typename Real,
           typename Device,
           typename Index >
 Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+   return this->getRowLengthFast( row );
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( row == 0 )
@@ -111,9 +132,9 @@ template< typename Real,
           typename Device,
           typename Index >
    template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
-   return this->setDimensions( m.getRows(), m.getColumns() );
+   this->setDimensions( m.getRows(), m.getColumns() );
 template< typename Real,
@@ -506,7 +527,7 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
       Devices::Cuda::freeFromDevice( kernel_this );
       Devices::Cuda::freeFromDevice( kernel_inMatrix );
-      checkCudaDevice;
@@ -529,6 +550,39 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector& b,
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+   this->setLike( matrix );
+   std::cerr << "Cross-device assignment for the Tridiagonal format is not implemented yet." << std::endl;
+   throw 1;
 template< typename Real,
           typename Device,
           typename Index >
diff --git a/src/TNL/Meshes/CMakeLists.txt b/src/TNL/Meshes/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/DummyMesh.h b/src/TNL/Meshes/DummyMesh.h
index efbf1be40615463610df3a4ceec361b634884582..ce4cd0bf42f92d8911fa482ca240e5e919ba00e1 100644
--- a/src/TNL/Meshes/DummyMesh.h
+++ b/src/TNL/Meshes/DummyMesh.h
@@ -25,10 +25,7 @@ class DummyMesh
    typedef Index IndexType;
    typedef DummyMesh< Real, Device, Index > ThisType;
-   static const int meshDimension = 1;
-   constexpr static int getMeshDimension() { return meshDimension; }
+   constexpr static int getMeshDimension() { return 1; }
    const Real& getParametricStep(){ return 0.0; }
diff --git a/src/TNL/Meshes/GridDetails/CMakeLists.txt b/src/TNL/Meshes/GridDetails/CMakeLists.txt
index 7e39623121c8dba1ee95332a69fada92a561bbd5..eeecd7bc50bbbac8492066be8f7bf9bf20b19e4d 100644
--- a/src/TNL/Meshes/GridDetails/CMakeLists.txt
+++ b/src/TNL/Meshes/GridDetails/CMakeLists.txt
@@ -15,11 +15,11 @@ SET( headers BoundaryGridEntityChecker.h
-             NeighbourGridEntitiesStorage.h
-             NeighbourGridEntityGetter1D_impl.h
-             NeighbourGridEntityGetter2D_impl.h
-             NeighbourGridEntityGetter3D_impl.h
-             NeighbourGridEntityGetter.h
+             NeighborGridEntitiesStorage.h
+             NeighborGridEntityGetter1D_impl.h
+             NeighborGridEntityGetter2D_impl.h
+             NeighborGridEntityGetter3D_impl.h
+             NeighborGridEntityGetter.h
diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h
index 0cec1e2deb13a08b7e4ad6613ab0d8c078b4f7c4..fdf3f932ea071f78dc605ea9108d3b0a35528cf8 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D.h
@@ -14,7 +14,7 @@
 #include <TNL/Logger.h>
 #include <TNL/Meshes/GridDetails/GridEntityTopology.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 #include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 #include <TNL/Meshes/DistributedGrid.h>
@@ -31,25 +31,26 @@ class Grid< 1, Real, Device, Index > : public Object
    typedef Real RealType;
    typedef Device DeviceType;
-   typedef Index IndexType;
+   typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 1, Real > PointType;
    typedef Containers::StaticVector< 1, Index > CoordinatesType;
    typedef Grid< 1, Real, Devices::Host, Index > HostType;
    typedef Grid< 1, Real, Devices::Cuda, Index > CudaType;
    typedef Grid< 1, Real, Device, Index > ThisType;
+   // TODO: deprecated and to be removed (GlobalIndexType shall be used instead)
+   typedef Index IndexType;
-   static const int meshDimension = 1;
+   static constexpr int getMeshDimension() { return 1; };
    template< int EntityDimension,
              typename Config = GridEntityCrossStencilStorage< 1 > >
-   using MeshEntity = GridEntity< ThisType, EntityDimension, Config >;
+   using EntityType = GridEntity< ThisType, EntityDimension, Config >;
-   typedef MeshEntity< meshDimension, GridEntityCrossStencilStorage< 1 > > Cell;
-   typedef MeshEntity< 0 > Face;
-   typedef MeshEntity< 0 > Vertex;
+   typedef EntityType< getMeshDimension(), GridEntityCrossStencilStorage< 1 > > Cell;
+   typedef EntityType< 0 > Face;
+   typedef EntityType< 0 > Vertex;
-   static constexpr int getMeshDimension() { return meshDimension; };
    static String getType();
@@ -79,29 +80,22 @@ class Grid< 1, Real, Device, Index > : public Object
    inline const PointType& getProportions() const;
-   template< typename EntityType >
+   template< int EntiytDimension >
    IndexType getEntitiesCount() const;
-   template< int EntiytDimensions >
+   template< typename Entity >
    IndexType getEntitiesCount() const;
-   template< typename EntityType >
+   template< typename Entity >
-   EntityType getEntity( const IndexType& entityIndex ) const;
-   template< typename EntityType >
-   __cuda_callable__
-   Index getEntityIndex( const EntityType& entity ) const;
-   template< typename EntityType >
-   __cuda_callable__
-   RealType getEntityMeasure( const EntityType& entity ) const;
+   inline Entity getEntity( const IndexType& entityIndex ) const;
+   template< typename Entity >
-   inline const RealType& getCellMeasure() const;
+   inline Index getEntityIndex( const Entity& entity ) const;
@@ -115,7 +109,10 @@ class Grid< 1, Real, Device, Index > : public Object
    const RealType& getSpaceStepsProducts() const;
-   RealType getSmallestSpaceStep() const;
+   inline const RealType& getCellMeasure() const;
+   __cuda_callable__
+   inline RealType getSmallestSpaceStep() const;
    template< typename GridFunction >
diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
index f372599a12b8f667a213f1fbe4f009befaf30e24..f4504c17e897f5dade777c54322bf959fff6fce0 100644
--- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h
@@ -14,9 +14,10 @@
 #include <iomanip>
 #include <TNL/String.h>
 #include <TNL/Assert.h>
+#include <TNL/Logger.h>
 #include <TNL/Meshes/GridDetails/GnuplotWriter.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter_impl.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter1D_impl.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h>
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/GridEntityMeasureGetter.h>
@@ -117,7 +118,7 @@ template< typename Real,
           typename Index  >
 void Grid< 1, Real, Device, Index >::setDimensions( const Index xSize )
-   TNL_ASSERT( xSize > 0, std::cerr << "xSize = " << xSize );
+   TNL_ASSERT_GT( xSize, 0, "Grid size must be positive." );
    this->dimensions.x() = xSize;
    this->numberOfCells = xSize;
    this->numberOfVertices = xSize + 1;
@@ -173,19 +174,20 @@ const typename Grid< 1, Real, Device, Index >::PointType&
    return this->proportions;
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< int EntityDimension >
 __cuda_callable__  inline
 Grid< 1, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityType::entityDimension <= 1 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( EntityDimension <= 1 &&
+                  EntityDimension >= 0, "Wrong grid entity dimensions." );
-   switch( EntityType::entityDimension )
+   switch( EntityDimension )
       case 1:
          return this->numberOfCells;
@@ -198,77 +200,43 @@ getEntitiesCount() const
 template< typename Real,
           typename Device,
           typename Index >
-   template< int EntityDimensions >
+   template< typename Entity >
 __cuda_callable__  inline
 Grid< 1, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityDimensions <= 1 &&
-                  EntityDimensions >= 0, "Wrong grid entity dimensions." );
-   switch( EntityDimensions )
-   {
-      case 1:
-         return this->numberOfCells;
-      case 0:
-         return this->numberOfVertices;
-   }
-   return -1;
+   return getEntitiesCount< Entity::getEntityDimension() >();
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
  __cuda_callable__ inline
 Grid< 1, Real, Device, Index >::
 getEntity( const IndexType& entityIndex ) const
-   static_assert( EntityType::entityDimension <= 1 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 1 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntity( *this, entityIndex );
+   return GridEntityGetter< ThisType, Entity >::getEntity( *this, entityIndex );
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
 __cuda_callable__ inline
 Grid< 1, Real, Device, Index >::
-getEntityIndex( const EntityType& entity ) const
+getEntityIndex( const Entity& entity ) const
-   static_assert( EntityType::entityDimension <= 1 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 1 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntityIndex( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename EntityType >
-Grid< 1, Real, Device, Index >::
-getEntityMeasure( const EntityType& entity ) const
-   return GridEntityMeasureGetter< ThisType, EntityType::getDimensions() >::getMeasure( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-const Real&
-Grid< 1, Real, Device, Index >::
-getCellMeasure() const
-   return this->template getSpaceStepsProducts< 1 >();
+   return GridEntityGetter< ThisType, Entity >::getEntityIndex( *this, entity );
 template< typename Real,
@@ -303,11 +271,21 @@ const Real&
 Grid< 1, Real, Device, Index >::
 getSpaceStepsProducts() const
-   TNL_ASSERT( xPow >= -2 && xPow <= 2,
-              std::cerr << " xPow = " << xPow );
+   static_assert( xPow >= -2 && xPow <= 2, "unsupported value of xPow" );
    return this->spaceStepsProducts[ xPow + 2 ];
+template< typename Real,
+          typename Device,
+          typename Index >
+const Real&
+Grid< 1, Real, Device, Index >::
+getCellMeasure() const
+   return this->template getSpaceStepsProducts< 1 >();
 template< typename Real,
           typename Device,
           typename Index >
@@ -469,7 +447,7 @@ bool Grid< 1, Real, Device, Index >::write( const MeshFunction& function,
    const RealType hx = getSpaceSteps(). x();
    if( format == "gnuplot" )
-      typename ThisType::template MeshEntity< getMeshDimension() > entity( *this );
+      typename ThisType::template EntityType< getMeshDimension() > entity( *this );
       for( entity.getCoordinates().x() = 0;
            entity.getCoordinates().x() < getDimensions(). x();
            entity.getCoordinates().x() ++ )
@@ -496,6 +474,8 @@ writeProlog( Logger& logger )
    logger.writeParameter( "Domain proportions:", this->proportions );
    logger.writeParameter( "Domain dimensions:", this->dimensions );
    logger.writeParameter( "Space steps:", this->getSpaceSteps() );
+   logger.writeParameter( "Number of cells:", getEntitiesCount< Cell >() );
+   logger.writeParameter( "Number of vertices:", getEntitiesCount< Vertex >() );
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h
index 9e2bcdfb52b491f9879f9d1c4a07e64a73635189..41a0a27d6f2cc82ab0ecf75a0a67a0c995b869b9 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D.h
@@ -11,9 +11,10 @@
 #pragma once
 #include <TNL/Meshes/Grid.h>
+#include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/GridDetails/GridEntityTopology.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 namespace TNL {
 namespace Meshes {
@@ -27,33 +28,34 @@ class Grid< 2, Real, Device, Index > : public Object
    typedef Real RealType;
    typedef Device DeviceType;
-   typedef Index IndexType;
+   typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 2, Real > PointType;
    typedef Containers::StaticVector< 2, Index > CoordinatesType;
    typedef Grid< 2, Real, Devices::Host, Index > HostType;
    typedef Grid< 2, Real, Devices::Cuda, Index > CudaType;
    typedef Grid< 2, Real, Device, Index > ThisType;
-   static const int meshDimension = 2;
+   // TODO: deprecated and to be removed (GlobalIndexType shall be used instead)
+   typedef Index IndexType;
+   static constexpr int getMeshDimension() { return 2; };
    template< int EntityDimension,
              typename Config = GridEntityNoStencilStorage >//CrossStencilStorage< 1 > >
-   using MeshEntity = GridEntity< ThisType, EntityDimension, Config >;
+   using EntityType = GridEntity< ThisType, EntityDimension, Config >;
-   typedef MeshEntity< meshDimension, GridEntityCrossStencilStorage< 1 > > Cell;
-   typedef MeshEntity< meshDimension - 1, GridEntityNoStencilStorage > Face;
-   typedef MeshEntity< 0 > Vertex;
+   typedef EntityType< getMeshDimension(), GridEntityCrossStencilStorage< 1 > > Cell;
+   typedef EntityType< getMeshDimension() - 1, GridEntityNoStencilStorage > Face;
+   typedef EntityType< 0 > Vertex;
    // TODO: remove this
    //template< int EntityDimension, 
    //          typename Config = GridEntityNoStencilStorage >//CrossStencilStorage< 1 > >
-   //using TestMeshEntity = tnlTestGridEntity< ThisType, EntityDimension, Config >;
-   //typedef TestMeshEntity< meshDimension, GridEntityCrossStencilStorage< 1 > > TestCell;
+   //using TestEntityType = tnlTestGridEntity< ThisType, EntityDimension, Config >;
+   //typedef TestEntityType< getMeshDimension(), GridEntityCrossStencilStorage< 1 > > TestCell;
-   static constexpr int getMeshDimension() { return meshDimension; };
    static String getType();
@@ -83,29 +85,22 @@ class Grid< 2, Real, Device, Index > : public Object
    inline const PointType& getProportions() const;
-   template< int EntityDimensions >
+   template< int EntityDimension >
    IndexType getEntitiesCount() const;
-   template< typename EntityType >
-   __cuda_callable__
-   IndexType getEntitiesCount() const;
-   template< typename EntityType >
-   __cuda_callable__
-   EntityType getEntity( const IndexType& entityIndex ) const;
-   template< typename EntityType >
+   template< typename Entity >
-   Index getEntityIndex( const EntityType& entity ) const;
-   template< typename EntityType >
-   __cuda_callable__
-   RealType getEntityMeasure( const EntityType& entity ) const;
+   inline IndexType getEntitiesCount() const;
+   template< typename Entity >
-   inline const RealType& getCellMeasure() const;
+   inline Entity getEntity( const IndexType& entityIndex ) const;
+   template< typename Entity >
+   __cuda_callable__
+   inline Index getEntityIndex( const Entity& entity ) const;
    inline const PointType& getSpaceSteps() const;
@@ -117,7 +112,10 @@ class Grid< 2, Real, Device, Index > : public Object
    const RealType& getSpaceStepsProducts() const;
-   RealType getSmallestSpaceStep() const;
+   inline const RealType& getCellMeasure() const;
+   __cuda_callable__
+   inline RealType getSmallestSpaceStep() const;
    template< typename GridFunction >
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index bb4a3c2944d496c1438e1bab70c437f75192b4f0..1df3f13630dcee66d2eebee7bca19a6c7ca20957 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -12,10 +12,13 @@
 #include <fstream>
 #include <iomanip>
+#include <TNL/String.h>
 #include <TNL/Assert.h>
+#include <TNL/Logger.h>
 #include <TNL/Meshes/GridDetails/GnuplotWriter.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter_impl.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter2D_impl.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h>
+#include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/GridEntityMeasureGetter.h>
 namespace TNL {
@@ -152,8 +155,8 @@ template< typename Real,
           typename Index >
 void Grid< 2, Real, Device, Index > :: setDimensions( const Index xSize, const Index ySize )
-   TNL_ASSERT( xSize > 0, std::cerr << "xSize = " << xSize );
-   TNL_ASSERT( ySize > 0, std::cerr << "ySize = " << ySize );
+   TNL_ASSERT_GT( xSize, 0, "Grid size must be positive." );
+   TNL_ASSERT_GT( ySize, 0, "Grid size must be positive." );
    this->dimensions.x() = xSize;
    this->dimensions.y() = ySize;
@@ -222,19 +225,20 @@ const typename Grid< 2, Real, Device, Index > :: PointType&
    return this->proportions;
 template< typename Real,
           typename Device,
           typename Index >
-   template< int EntityDimensions >
+   template< int EntityDimension >
 __cuda_callable__ inline
 Grid< 2, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityDimensions <= 2 &&
-                  EntityDimensions >= 0, "Wrong grid entity dimensions." );
+   static_assert( EntityDimension <= 2 &&
+                  EntityDimension >= 0, "Wrong grid entity dimensions." );
-   switch( EntityDimensions )
+   switch( EntityDimension )
       case 2:
          return this->numberOfCells;
@@ -246,85 +250,48 @@ getEntitiesCount() const
    return -1;
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
 __cuda_callable__ inline
 Grid< 2, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityType::entityDimension <= 2 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
-   switch( EntityType::entityDimension )
-   {
-      case 2:
-         return this->numberOfCells;
-      case 1:
-         return this->numberOfFaces;
-      case 0:
-         return this->numberOfVertices;
-   }
-   return -1;
+   return getEntitiesCount< Entity::getEntityDimension() >();
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
 __cuda_callable__ inline
 Grid< 2, Real, Device, Index >::
 getEntity( const IndexType& entityIndex ) const
-   static_assert( EntityType::entityDimension <= 2 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 2 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntity( *this, entityIndex );
+   return GridEntityGetter< ThisType, Entity >::getEntity( *this, entityIndex );
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
 __cuda_callable__ inline
 Grid< 2, Real, Device, Index >::
-getEntityIndex( const EntityType& entity ) const
+getEntityIndex( const Entity& entity ) const
-   static_assert( EntityType::entityDimension <= 2 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 2 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntityIndex( *this, entity );
+   return GridEntityGetter< ThisType, Entity >::getEntityIndex( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename EntityType >
-Grid< 2, Real, Device, Index >::
-getEntityMeasure( const EntityType& entity ) const
-   return GridEntityMeasureGetter< ThisType, EntityType::getDimensions() >::getMeasure( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-const Real&
-Grid< 2, Real, Device, Index >::
-getCellMeasure() const
-   return this->template getSpaceStepsProducts< 1, 1 >();
 template< typename Real,
           typename Device,
           typename Index >
@@ -357,14 +324,22 @@ const Real&
 Grid< 2, Real, Device, Index >::
 getSpaceStepsProducts() const
-   TNL_ASSERT( xPow >= -2 && xPow <= 2,
-              std::cerr << " xPow = " << xPow );
-   TNL_ASSERT( yPow >= -2 && yPow <= 2,
-              std::cerr << " yPow = " << yPow );
+   static_assert( xPow >= -2 && xPow <= 2, "unsupported value of xPow" );
+   static_assert( yPow >= -2 && yPow <= 2, "unsupported value of yPow" );
    return this->spaceStepsProducts[ xPow + 2 ][ yPow + 2 ];
+template< typename Real,
+          typename Device,
+          typename Index >
+const Real&
+Grid< 2, Real, Device, Index >::
+getCellMeasure() const
+   return this->template getSpaceStepsProducts< 1, 1 >();
 template< typename Real,
           typename Device,
           typename Index >
@@ -531,7 +506,7 @@ bool Grid< 2, Real, Device, Index > :: writeMesh( const String& fileName,
            << this->getProportions(). x() << "cm , "
            << this->getProportions(). y() << "cm );"
            << std::endl << std::endl;
-      MeshEntity< 0 > vertex( *this );
+      Vertex vertex( *this );
       CoordinatesType& vertexCoordinates = vertex.getCoordinates();
       PointType v;
       for( Index j = 0; j < this->dimensions. y(); j ++ )
@@ -569,7 +544,7 @@ bool Grid< 2, Real, Device, Index > :: writeMesh( const String& fileName,
       file << std::endl;
-      MeshEntity< 2 > cell( *this );
+      Cell cell( *this );
       CoordinatesType& cellCoordinates = cell.getCoordinates();
       const RealType cellMeasure = this->getSpaceSteps().x() * this->getSpaceSteps().y();
       for( Index i = 0; i < this->dimensions. x(); i ++ )
@@ -692,6 +667,9 @@ writeProlog( Logger& logger )
    logger.writeParameter( "Domain proportions:", this->proportions );
    logger.writeParameter( "Domain dimensions:", this->dimensions );
    logger.writeParameter( "Space steps:", this->getSpaceSteps() );
+   logger.writeParameter( "Number of cells:", getEntitiesCount< Cell >() );
+   logger.writeParameter( "Number of faces:", getEntitiesCount< Face >() );
+   logger.writeParameter( "Number of vertices:", getEntitiesCount< Vertex >() );
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Grid3D.h b/src/TNL/Meshes/GridDetails/Grid3D.h
index 3ba3b2505e4bdf8f1c65bafd95f2be4680ce4768..2321d888fa557abaab05fd5f0deec0bb5aceb99f 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D.h
@@ -11,9 +11,10 @@
 #pragma once
 #include <TNL/Meshes/Grid.h>
+#include <TNL/Meshes/GridEntity.h>
 #include <TNL/Meshes/GridDetails/GridEntityTopology.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 namespace TNL {
 namespace Meshes {
@@ -27,25 +28,26 @@ class Grid< 3, Real, Device, Index > : public Object
    typedef Real RealType;
    typedef Device DeviceType;
-   typedef Index IndexType;
+   typedef Index GlobalIndexType;
    typedef Containers::StaticVector< 3, Real > PointType;
    typedef Containers::StaticVector< 3, Index > CoordinatesType;
    typedef Grid< 3, Real, Devices::Host, Index > HostType;
    typedef Grid< 3, Real, Devices::Cuda, Index > CudaType;
    typedef Grid< 3, Real, Device, Index > ThisType;
-   static const int meshDimension = 3;
+   // TODO: deprecated and to be removed (GlobalIndexType shall be used instead)
+   typedef Index IndexType;
+   static constexpr int getMeshDimension() { return 3; };
    template< int EntityDimension,
              typename Config = GridEntityCrossStencilStorage< 1 > >
-   using MeshEntity = GridEntity< ThisType, EntityDimension, Config >;
-   typedef MeshEntity< meshDimension, GridEntityCrossStencilStorage< 1 > > Cell;
-   typedef MeshEntity< meshDimension - 1 > Face;
-   typedef MeshEntity< 1 > Edge;
-   typedef MeshEntity< 0 > Vertex;
+   using EntityType = GridEntity< ThisType, EntityDimension, Config >;
-   static constexpr int getMeshDimension() { return meshDimension; };
+   typedef EntityType< getMeshDimension(), GridEntityCrossStencilStorage< 1 > > Cell;
+   typedef EntityType< getMeshDimension() - 1 > Face;
+   typedef EntityType< 1 > Edge;
+   typedef EntityType< 0 > Vertex;
@@ -72,29 +74,23 @@ class Grid< 3, Real, Device, Index > : public Object
    inline const PointType& getProportions() const;
-   template< typename EntityType >
+   template< int EntityDimension >
    IndexType getEntitiesCount() const;
-   template< int Dimensions >
+   template< typename Entity >
    IndexType getEntitiesCount() const;
-   template< typename EntityType >
-   __cuda_callable__
-   EntityType getEntity( const IndexType& entityIndex ) const;
-   template< typename EntityType >
-   __cuda_callable__
-   Index getEntityIndex( const EntityType& entity ) const;
-   template< typename EntityType >
+   template< typename Entity >
-   RealType getEntityMeasure( const EntityType& entity ) const;
+   inline Entity getEntity( const IndexType& entityIndex ) const;
+   template< typename Entity >
-   inline const RealType& getCellMeasure() const;
+   inline Index getEntityIndex( const Entity& entity ) const;
    inline const PointType& getSpaceSteps() const;
@@ -102,6 +98,9 @@ class Grid< 3, Real, Device, Index > : public Object
    const RealType& getSpaceStepsProducts() const;
+   __cuda_callable__
+   inline const RealType& getCellMeasure() const;
    RealType getSmallestSpaceStep() const;
@@ -155,7 +154,7 @@ class Grid< 3, Real, Device, Index > : public Object
    PointType origin, proportions;
-   IndexType cellZNeighboursStep;
+   IndexType cellZNeighborsStep;
    PointType spaceSteps;
@@ -165,7 +164,7 @@ class Grid< 3, Real, Device, Index > : public Object
    friend class GridEntityGetter;
    template< typename, int, typename >
-   friend class NeighbourGridEntityGetter;
+   friend class NeighborGridEntityGetter;
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
index d8e4d175fcfacf6192542f536f8f873ac0728130..b6cb533c3dc723a99163cc0b2b84dece2b9e3866 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
@@ -10,10 +10,14 @@
 #pragma once
+#include <fstream>
 #include <iomanip>
+#include <TNL/String.h>
 #include <TNL/Assert.h>
+#include <TNL/Logger.h>
+#include <TNL/Meshes/GridDetails/GnuplotWriter.h>
 #include <TNL/Meshes/GridDetails/GridEntityGetter_impl.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter3D_impl.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
 #include <TNL/Meshes/GridDetails/GridEntityMeasureGetter.h>
@@ -45,7 +49,7 @@ template< typename Real,
 String Grid< 3, Real, Device, Index > :: getType()
    return String( "Meshes::Grid< " ) +
-          String( meshDimension ) + ", " +
+          String( getMeshDimension() ) + ", " +
           String( TNL::getType< RealType >() ) + ", " +
           String( Device :: getDeviceType() ) + ", " +
           String( TNL::getType< IndexType >() ) + " >";
@@ -164,9 +168,9 @@ template< typename Real,
           typename Index >
 void Grid< 3, Real, Device, Index > :: setDimensions( const Index xSize, const Index ySize, const Index zSize )
-   TNL_ASSERT( xSize > 0, std::cerr << "xSize = " << xSize );
-   TNL_ASSERT( ySize > 0, std::cerr << "ySize = " << ySize );
-   TNL_ASSERT( zSize > 0, std::cerr << "zSize = " << zSize );
+   TNL_ASSERT_GT( xSize, 0, "Grid size must be positive." );
+   TNL_ASSERT_GT( ySize, 0, "Grid size must be positive." );
+   TNL_ASSERT_GT( zSize, 0, "Grid size must be positive." );
    this->dimensions.x() = xSize;
    this->dimensions.y() = ySize;
@@ -188,7 +192,7 @@ void Grid< 3, Real, Device, Index > :: setDimensions( const Index xSize, const I
    this->numberOfVertices = ( xSize + 1 ) * ( ySize + 1 ) * ( zSize + 1 );
-   this->cellZNeighboursStep = xSize * ySize;
+   this->cellZNeighborsStep = xSize * ySize;
@@ -242,19 +246,20 @@ const typename Grid< 3, Real, Device, Index > :: PointType&
 	return this->proportions;
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< int EntityDimension >
 __cuda_callable__  inline
 Grid< 3, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityType::entityDimension <= 3 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( EntityDimension <= 3 &&
+                  EntityDimension >= 0, "Wrong grid entity dimensions." );
-   switch( EntityType::entityDimension )
+   switch( EntityDimension )
       case 3:
          return this->numberOfCells;
@@ -271,80 +276,43 @@ getEntitiesCount() const
 template< typename Real,
           typename Device,
           typename Index >
-   template< int EntityDimensions >
+   template< typename Entity >
 __cuda_callable__  inline
 Grid< 3, Real, Device, Index >::
 getEntitiesCount() const
-   static_assert( EntityDimensions <= 3 &&
-                  EntityDimensions >= 0, "Wrong grid entity dimensions." );
-   switch( EntityDimensions )
-   {
-      case 3:
-         return this->numberOfCells;
-      case 2:
-         return this->numberOfFaces;
-      case 1:
-         return this->numberOfEdges;
-      case 0:
-         return this->numberOfVertices;
-   }
-   return -1;
+   return getEntitiesCount< Entity::getEntityDimension() >();
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
  __cuda_callable__ inline
 Grid< 3, Real, Device, Index >::
 getEntity( const IndexType& entityIndex ) const
-   static_assert( EntityType::entityDimension <= 3 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 3 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntity( *this, entityIndex );
+   return GridEntityGetter< ThisType, Entity >::getEntity( *this, entityIndex );
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename EntityType >
+   template< typename Entity >
 __cuda_callable__ inline
 Grid< 3, Real, Device, Index >::
-getEntityIndex( const EntityType& entity ) const
+getEntityIndex( const Entity& entity ) const
-   static_assert( EntityType::entityDimension <= 3 &&
-                  EntityType::entityDimension >= 0, "Wrong grid entity dimension." );
+   static_assert( Entity::getEntityDimension() <= 3 &&
+                  Entity::getEntityDimension() >= 0, "Wrong grid entity dimensions." );
-   return GridEntityGetter< ThisType, EntityType >::getEntityIndex( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-   template< typename EntityType >
-Grid< 3, Real, Device, Index >::
-getEntityMeasure( const EntityType& entity ) const
-   return GridEntityMeasureGetter< ThisType, EntityType::getDimensions() >::getMeasure( *this, entity );
-template< typename Real,
-          typename Device,
-          typename Index >
-const Real&
-Grid< 3, Real, Device, Index >::
-getCellMeasure() const
-   return this->template getSpaceStepsProducts< 1, 1, 1 >();
+   return GridEntityGetter< ThisType, Entity >::getEntityIndex( *this, entity );
 template< typename Real,
@@ -367,16 +335,23 @@ const Real&
 Grid< 3, Real, Device, Index >::
 getSpaceStepsProducts() const
-   TNL_ASSERT( xPow >= -2 && xPow <= 2,
-              std::cerr << " xPow = " << xPow );
-   TNL_ASSERT( yPow >= -2 && yPow <= 2,
-              std::cerr << " yPow = " << yPow );
-   TNL_ASSERT( zPow >= -2 && zPow <= 2,
-              std::cerr << " zPow = " << zPow );
+   static_assert( xPow >= -2 && xPow <= 2, "unsupported value of xPow" );
+   static_assert( yPow >= -2 && yPow <= 2, "unsupported value of yPow" );
+   static_assert( zPow >= -2 && zPow <= 2, "unsupported value of zPow" );
    return this->spaceStepsProducts[ xPow + 2 ][ yPow + 2 ][ zPow + 2 ];
+template< typename Real,
+          typename Device,
+          typename Index >
+const Real&
+Grid< 3, Real, Device, Index >::
+getCellMeasure() const
+   return this->template getSpaceStepsProducts< 1, 1, 1 >();
 template< typename Real,
           typename Device,
           typename Index >
@@ -405,7 +380,7 @@ typename GridFunction::RealType
                                                  const typename GridFunction::RealType& p ) const
    typename GridFunction::RealType lpNorm( 0.0 );
-   MeshEntity< getMeshDimension() > cell;
+   Cell cell;
    for( cell.getCoordinates().z() = 0;
         cell.getCoordinates().z() < getDimensions().z();
         cell.getCoordinates().z()++ )
@@ -433,7 +408,7 @@ template< typename Real,
                                                                            const GridFunction& f2 ) const
    typename GridFunction::RealType maxDiff( -1.0 );
-   MeshEntity< getMeshDimension() > cell( *this );
+   Cell cell( *this );
    for( cell.getCoordinates().z() = 0;
         cell.getCoordinates().z() < getDimensions().z();
         cell.getCoordinates().z()++ )
@@ -460,8 +435,7 @@ template< typename Real,
                                                                  const typename GridFunction::RealType& p ) const
    typename GridFunction::RealType lpNorm( 0.0 );
-   MeshEntity< getMeshDimension() > cell( *this );
+   Cell cell( *this );
    for( cell.getCoordinates().z() = 0;
         cell.getCoordinates().z() < getDimensions().z();
         cell.getCoordinates().z()++ )
@@ -537,7 +511,9 @@ template< typename Real,
 bool Grid< 3, Real, Device, Index >::writeMesh( const String& fileName,
                                                    const String& format ) const
-   TNL_ASSERT( false, std::cerr << "TODO: FIX THIS"); // TODO: FIX THIS
+   /*****
+    * TODO: implement this
+    */
    return true;
@@ -604,6 +580,9 @@ writeProlog( Logger& logger )
    logger.writeParameter( "Domain proportions:", this->proportions );
    logger.writeParameter( "Domain dimensions:", this->dimensions );
    logger.writeParameter( "Space steps:", this->getSpaceSteps() );
+   logger.writeParameter( "Number of cells:", getEntitiesCount< Cell >() );
+   logger.writeParameter( "Number of faces:", getEntitiesCount< Face >() );
+   logger.writeParameter( "Number of vertices:", getEntitiesCount< Vertex >() );
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/GridEntityGetter.h b/src/TNL/Meshes/GridDetails/GridEntityGetter.h
index 17f764052a8bda7d266dc209bab4bc1d43abe88a..1c092e41be541a66b30cb330a8ef6689cdcaf717 100644
--- a/src/TNL/Meshes/GridDetails/GridEntityGetter.h
+++ b/src/TNL/Meshes/GridDetails/GridEntityGetter.h
@@ -15,7 +15,7 @@ namespace Meshes {
 template< typename Grid,
           typename GridEntity,
-          int EntityDimension = GridEntity::entityDimension >
+          int EntityDimension = GridEntity::getEntityDimension() >
 class GridEntityGetter
    //static_assert( false, "Wrong mesh type or entity topology." );
diff --git a/src/TNL/Meshes/GridDetails/GridEntityGetter_impl.h b/src/TNL/Meshes/GridDetails/GridEntityGetter_impl.h
index 15ad36fac490dd33f351566bde7cbd0af1228044..49bc86956d659edff903b15927a1f4753c80fab2 100644
--- a/src/TNL/Meshes/GridDetails/GridEntityGetter_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridEntityGetter_impl.h
@@ -44,10 +44,8 @@ class GridEntityGetter<
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-              std::cerr << " index = " << index
-                   << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                   << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          return GridEntity
             ( grid,
               CoordinatesType( index ),
@@ -59,11 +57,8 @@ class GridEntityGetter<
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions() + CoordinatesType( 1 - entityDimension ),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " grid.getDimensions() = " << grid.getDimensions()
-                   << " EntityDimension = " << entityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), grid.getDimensions() + CoordinatesType( 1 - entityDimension ), "wrong coordinates" );
          return entity.getCoordinates().x();
@@ -90,10 +85,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -109,10 +102,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " grid.getDimensions() = " << grid.getDimensions() );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), grid.getDimensions(), "wrong coordinates" );
          //const CoordinatesType coordinates = entity.getCoordinates();
          //const CoordinatesType dimensions = grid.getDimensions();
@@ -142,10 +133,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -171,11 +160,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions() + abs( entity.getOrientation() ),
-                 std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                      << " grid.getDimensions() = " << grid.getDimensions()
-                      << " abs( entity.getOrientation() ) = " << abs( entity.getOrientation() ) );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), grid.getDimensions() + abs( entity.getOrientation() ), "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -205,10 +191,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -225,9 +209,8 @@ class GridEntityGetter< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= 0 && entity.getCoordinates() <= grid.getDimensions(),
-            std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                 << " grid.getDimensions() = " << grid.getDimensions() );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), grid.getDimensions(), "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -258,10 +241,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -278,10 +259,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " grid.getDimensions() = " << grid.getDimensions() );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), grid.getDimensions(), "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -310,10 +289,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -354,11 +331,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions() + abs( entity.getOrientation() ),
-                 std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                      << " grid.getDimensions() = " << grid.getDimensions()
-                      << " abs( entity.getOrientation() ) = " << abs( entity.getOrientation() ) );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), grid.getDimensions() + abs( entity.getOrientation() ), "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -400,10 +374,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -447,12 +419,10 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < grid.getDimensions() +
-                       CoordinatesType( 1, 1, 1 ) - entity.getBasis(),
-            std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                 << " grid.getDimensions() = " << grid.getDimensions()
-                 << " CoordinatesType( 1, 1, 1 ) - entity.getBasis() = " << CoordinatesType( 1, 1, 1 ) - entity.getBasis() );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(),
+                        grid.getDimensions() + CoordinatesType( 1, 1, 1 ) - entity.getBasis(),
+                        "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -490,10 +460,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
       static GridEntity getEntity( const GridType& grid,
                                    const IndexType& index )
-         TNL_ASSERT( index >= 0 && index < grid.template getEntitiesCount< GridEntity >(),
-           std::cerr << " index = " << index
-                << " grid.getEntitiesCount<>() = " << grid.template getEntitiesCount< GridEntity >()
-                << " entityDimension = " << entityDimension );
+         TNL_ASSERT_GE( index, 0, "Index must be non-negative." );
+         TNL_ASSERT_LT( index, grid.template getEntitiesCount< GridEntity >(), "Index is out of bounds." );
          const CoordinatesType dimensions = grid.getDimensions();
@@ -512,9 +480,8 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
       static IndexType getEntityIndex( const GridType& grid,
                                        const GridEntity& entity )
-         TNL_ASSERT( entity.getCoordinates() >= 0 && entity.getCoordinates() <= grid.getDimensions(),
-            std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                 << " grid.getDimensions() = " << grid.getDimensions() );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), grid.getDimensions(), "wrong coordinates" );
          const CoordinatesType coordinates = entity.getCoordinates();
          const CoordinatesType dimensions = grid.getDimensions();
@@ -527,4 +494,3 @@ class GridEntityGetter< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridEntityTopology.h b/src/TNL/Meshes/GridDetails/GridEntityTopology.h
index 716b2dcc2d662174dbf2006c450993adcce81bb4..9a9ba9e317c23926ce171063162686c8c57e954d 100644
--- a/src/TNL/Meshes/GridDetails/GridEntityTopology.h
+++ b/src/TNL/Meshes/GridDetails/GridEntityTopology.h
@@ -25,8 +25,8 @@ class GridEntityTopology
       typedef Grid GridType;
       // TODO: restore when CUDA allows it
-      //static const int meshDimension = GridType::Dimension;
-      enum { meshDimension = GridType::Dimension };
+      //static const int meshDimension = GridType::getMeshDimension();
+      enum { meshDimension = GridType::getMeshDimension() };
       static const int entityDimension = EntityDimension;
diff --git a/src/TNL/Meshes/GridDetails/GridEntity_impl.h b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
index 842ac4ad69f150adb962843db4aeb8f4b58e8441..8703c064fd6e2271ea3705ddb4355261edf14cd9 100644
--- a/src/TNL/Meshes/GridDetails/GridEntity_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridEntity_impl.h
@@ -44,7 +44,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid )
   coordinates( 0 ),
   orientation( 0 ),
   basis( 0 ),
-  neighbourEntitiesStorage( *this )
+  neighborEntitiesStorage( *this )
@@ -65,7 +65,7 @@ GridEntity( const Meshes::Grid< Dimension, Real, Device, Index >& grid,
   coordinates( coordinates ),
   orientation( orientation ),
   basis( basis ),
-  neighbourEntitiesStorage( *this )
+  neighborEntitiesStorage( *this )
@@ -123,7 +123,7 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighbourEntitiesStorage.refresh( this->grid, this->entityIndex );
+   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 template< int Dimension,
@@ -138,14 +138,12 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Con
 getIndex() const
    typedef Meshes::Grid< Dimension, Real, Device, Index > GridType;
-   typedef typename GridType::template MeshEntity< EntityDimension > EntityType;
-   TNL_ASSERT( this->entityIndex >= 0 &&
-              this-> entityIndex < grid.template getEntitiesCount< EntityType >(),
-              std::cerr << "this->entityIndex = " << this->entityIndex
-                   << " grid.template getEntitiesCount< EntityDimension >() = " << grid.template getEntitiesCount< EntityType >() );
-   TNL_ASSERT( this->entityIndex == grid.getEntityIndex( *this ),
-              std::cerr << "this->entityIndex = " << this->entityIndex
-                   << " grid.getEntityIndex( *this ) = " << grid.getEntityIndex( *this ) );
+   typedef typename GridType::template EntityType< EntityDimension > EntityType;
+   TNL_ASSERT_GE( this->entityIndex, 0, "Entity index is not non-negative." );
+   TNL_ASSERT_LT( this->entityIndex, grid.template getEntitiesCount< EntityDimension >(),
+                  "Entity index is out of bounds." );
+   TNL_ASSERT_EQ( this->entityIndex, grid.getEntityIndex( *this ),
+                  "Wrong value of stored index." );
    return this->entityIndex;
@@ -213,13 +211,13 @@ template< int Dimension,
           typename Index,
           int EntityDimension,
           typename Config >
-   template< int NeighbourEntityDimension >
+   template< int NeighborEntityDimension >
 __cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::template NeighbourEntities< NeighbourEntityDimension >&
+const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::template NeighborEntities< NeighborEntityDimension >&
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimension, Config >::
-getNeighbourEntities() const
+getNeighborEntities() const
-   return neighbourEntitiesStorage.template getNeighbourEntities< NeighbourEntityDimension >();
+   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
 template< int Dimension,
@@ -301,7 +299,7 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:
 GridEntity( const GridType& grid )
 : grid( grid ),
   entityIndex( -1 ),
-  neighbourEntitiesStorage( *this )
+  neighborEntitiesStorage( *this )
    this->coordinates = CoordinatesType( ( Index ) 0 );
@@ -320,7 +318,7 @@ GridEntity( const GridType& grid,
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( coordinates ),
-  neighbourEntitiesStorage( *this )
+  neighborEntitiesStorage( *this )
@@ -374,7 +372,7 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >:
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighbourEntitiesStorage.refresh( this->grid, this->entityIndex );
+   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 template< int Dimension,
@@ -387,13 +385,11 @@ Index
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
 getIndex() const
-   TNL_ASSERT( this->entityIndex >= 0 &&
-              this-> entityIndex < grid.template getEntitiesCount< ThisType >(),
-              std::cerr << "this->entityIndex = " << this->entityIndex
-                   << " grid.template getEntitiesCount< Dimension >() = " << grid.template getEntitiesCount< ThisType >() );
-   TNL_ASSERT( this->entityIndex == grid.getEntityIndex( *this ),
-              std::cerr << "this->index = " << this->entityIndex
-                   << " grid.getEntityIndex( *this ) = " << grid.getEntityIndex( *this ) );
+   TNL_ASSERT_GE( this->entityIndex, 0, "Entity index is not non-negative." );
+   TNL_ASSERT_LT( this->entityIndex, grid.template getEntitiesCount< Dimension >(),
+                  "Entity index is out of bounds." );
+   TNL_ASSERT_EQ( this->entityIndex, grid.getEntityIndex( *this ),
+                  "Wrong value of stored index." );
    return this->entityIndex;
@@ -428,13 +424,13 @@ template< int Dimension,
           typename Device,
           typename Index,
           typename Config >
-   template< int NeighbourEntityDimension >
+   template< int NeighborEntityDimension >
 __cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::template NeighbourEntities< NeighbourEntityDimension >&
+const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::template NeighborEntities< NeighborEntityDimension >&
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Config >::
-getNeighbourEntities() const
+getNeighborEntities() const
-   return neighbourEntitiesStorage.template getNeighbourEntities< NeighbourEntityDimension >();
+   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
 template< int Dimension,
@@ -518,7 +514,7 @@ GridEntity( const GridType& grid )
  : grid( grid ),
    entityIndex( -1 ),
    coordinates( 0 ),
-   neighbourEntitiesStorage( *this )
+   neighborEntitiesStorage( *this )
@@ -536,7 +532,7 @@ GridEntity( const GridType& grid,
 : grid( grid ),
   entityIndex( -1 ),
   coordinates( coordinates ),
-  neighbourEntitiesStorage( *this )
+  neighborEntitiesStorage( *this )
@@ -590,7 +586,7 @@ GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
    this->entityIndex = this->grid.getEntityIndex( *this );
-   this->neighbourEntitiesStorage.refresh( this->grid, this->entityIndex );
+   this->neighborEntitiesStorage.refresh( this->grid, this->entityIndex );
 template< int Dimension,
@@ -605,13 +601,11 @@ getIndex() const
    typedef Meshes::Grid< Dimension, Real, Device, Index > GridType;
    typedef typename GridType::Vertex Vertex;
-   TNL_ASSERT( this->entityIndex >= 0 &&
-              this-> entityIndex < grid.template getEntitiesCount< Vertex >(),
-              std::cerr << "this->entityIndex = " << this->entityIndex
-                   << " grid.template getEntitiesCount< 0 >() = " << grid.template getEntitiesCount< Vertex >() );
-   TNL_ASSERT( this->entityIndex == grid.getEntityIndex( *this ),
-              std::cerr << "this->entityIndex = " << this->entityIndex
-                   << " grid.getEntityIndex( *this ) = " << grid.getEntityIndex( *this ) );
+   TNL_ASSERT_GE( this->entityIndex, 0, "Entity index is not non-negative." );
+   TNL_ASSERT_LT( this->entityIndex, grid.template getEntitiesCount< 0 >(),
+                  "Entity index is out of bounds." );
+   TNL_ASSERT_EQ( this->entityIndex, grid.getEntityIndex( *this ),
+                  "Wrong value of stored index." );
    return this->entityIndex;
@@ -646,13 +640,13 @@ template< int Dimension,
           typename Device,
           typename Index,
           typename Config >
-   template< int NeighbourEntityDimension >
+   template< int NeighborEntityDimension >
 __cuda_callable__ inline
-const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::template NeighbourEntities< NeighbourEntityDimension >&
+const typename GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::template NeighborEntities< NeighborEntityDimension >&
 GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >::
-getNeighbourEntities() const
+getNeighborEntities() const
-   return neighbourEntitiesStorage.template getNeighbourEntities< NeighbourEntityDimension >();
+   return neighborEntitiesStorage.template getNeighborEntities< NeighborEntityDimension >();
 template< int Dimension,
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h
index 8365f41d7944e04291f13a849afb57832cbc2274..7e821d714bcdb3b6081ac59ae160ab898434979b 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser.h
@@ -85,6 +85,38 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > >
          const int& stream = 0 );
+ * 1D grid, Devices::MIC
+ */
+template< typename Real,
+          typename Index >
+class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >
+   public:
+      typedef Meshes::Grid< 1, Real, Devices::MIC, Index > GridType;
+      typedef SharedPointer< GridType > GridPointer;
+      typedef Real RealType;
+      typedef Devices::MIC DeviceType;
+      typedef Index IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+      template<
+         typename GridEntity,
+         typename EntitiesProcessor,
+         typename UserData,
+         bool processOnlyBoundaryEntities  >
+      static void
+      processEntities(
+         const GridPointer& gridPointer,
+         const CoordinatesType& begin,
+         const CoordinatesType& end,
+         SharedPointer< UserData, DeviceType >& userData,
+         const int& stream = 0 );
  * 2D grid, Devices::Host
@@ -161,6 +193,44 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
+ * 2D grid, Devices::MIC
+ */
+template< typename Real,
+          typename Index >
+class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >
+   public:
+      typedef Meshes::Grid< 2, Real, Devices::MIC, Index > GridType;
+      typedef SharedPointer< GridType > GridPointer;
+      typedef Real RealType;
+      typedef Devices::MIC DeviceType;
+      typedef Index IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+      template<
+         typename GridEntity,
+         typename EntitiesProcessor,
+         typename UserData,
+         bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary = 1,
+         int YOrthogonalBoundary = 1,
+         typename... GridEntityParameters >
+      static void
+      processEntities(
+         const GridPointer& gridPointer,
+         const CoordinatesType& begin,
+         const CoordinatesType& end,
+         SharedPointer< UserData, DeviceType >& userData,
+         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
+//         const int& stream = 0,
+         const int& stream,
+         // gridEntityParameters are passed to GridEntity's constructor
+         // (i.e. orientation and basis for faces)
+         const GridEntityParameters&... gridEntityParameters );
  * 3D grid, Devices::Host
@@ -239,6 +309,45 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > >
          const GridEntityParameters&... gridEntityParameters );
+ * 3D grid, Devices::Cuda
+ */
+template< typename Real,
+          typename Index >
+class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >
+   public:
+      typedef Meshes::Grid< 3, Real, Devices::MIC, Index > GridType;
+      typedef SharedPointer< GridType > GridPointer;
+      typedef Real RealType;
+      typedef Devices::MIC DeviceType;
+      typedef Index IndexType;
+      typedef typename GridType::CoordinatesType CoordinatesType;
+      template<
+         typename GridEntity,
+         typename EntitiesProcessor,
+         typename UserData,
+         bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary = 1,
+         int YOrthogonalBoundary = 1,
+         int ZOrthogonalBoundary = 1,
+         typename... GridEntityParameters >
+      static void
+      processEntities(
+         const GridPointer& gridPointer,
+         const CoordinatesType& begin,
+         const CoordinatesType& end,
+         SharedPointer< UserData, DeviceType >& userData,
+         // FIXME: hack around nvcc bug (error: default argument not at end of parameter list)
+//         const int& stream = 0,
+         const int& stream,
+         // gridEntityParameters are passed to GridEntity's constructor
+         // (i.e. orientation and basis for faces and edges)
+         const GridEntityParameters&... gridEntityParameters );
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_impl.h b/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
index 2adfb1b4cd34988f60760ff77635b51097226194..4443e161c3d3296f7a7924d927f00a3581248cec 100644
--- a/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
+++ b/src/TNL/Meshes/GridDetails/GridTraverser_impl.h
@@ -8,8 +8,14 @@
 /* See Copyright Notice in tnl/Copyright */
+#include <TNL/Devices/MIC.h>
 #pragma once
+#include "GridTraverser.h"
+#include <TNL/Exceptions/CudaSupportMissing.h>
 namespace TNL {
 namespace Meshes {
@@ -193,11 +199,75 @@ processEntities(
    if( stream == 0 )
       cudaStreamSynchronize( s );
-      checkCudaDevice;
+   throw Exceptions::CudaSupportMissing();
+ * 1D traverser, MIC
+ */
+template< typename Real,
+          typename Index >
+   template<
+      typename GridEntity,
+      typename EntitiesProcessor,
+      typename UserData,
+      bool processOnlyBoundaryEntities >
+GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >::
+   const GridPointer& gridPointer,
+   const CoordinatesType& begin,
+   const CoordinatesType& end,
+   SharedPointer< UserData, DeviceType >& userDataPointer,
+   const int& stream )
+    std::cout << "Not Implemented yet Grid Traverser <1, Real, Device::MIC>" << std::endl;
+   auto& pool = CudaStreamPool::getInstance();
+   const cudaStream_t& s = pool.getStream( stream );
+   Devices::Cuda::synchronizeDevice();
+   if( processOnlyBoundaryEntities )
+   {
+      dim3 cudaBlockSize( 2 );
+      dim3 cudaBlocks( 1 );
+      GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
+            <<< cudaBlocks, cudaBlockSize, 0, s >>>
+            ( &gridPointer.template getData< Devices::Cuda >(),
+              &userDataPointer.template modifyData< Devices::Cuda >(),
+              begin,
+              end );
+   }
+   else
+   {
+      dim3 cudaBlockSize( 256 );
+      dim3 cudaBlocks;
+      cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+      const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
+      for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+         GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor >
+            <<< cudaBlocks, cudaBlockSize, 0, s >>>
+            ( &gridPointer.template getData< Devices::Cuda >(),
+              &userDataPointer.template modifyData< Devices::Cuda >(),
+              begin,
+              end,
+              gridXIdx );
+   }
+   // only launches into the stream 0 are synchronized
+   if( stream == 0 )
+   {
+      cudaStreamSynchronize( s );
+      checkCudaDevice;
+   }
  * 2D traverser, host
@@ -499,7 +569,7 @@ processEntities(
       cudaStreamSynchronize( s2 );
       cudaStreamSynchronize( s3 );
       cudaStreamSynchronize( s4 );
-      checkCudaDevice;
@@ -533,9 +603,109 @@ processEntities(
       if( stream == 0 )
          cudaStreamSynchronize( s );
-         checkCudaDevice;
+   throw Exceptions::CudaSupportMissing();
+ * 2D traverser, MIC
+ */
+template< typename Real,
+          typename Index >
+   template<
+      typename GridEntity,
+      typename EntitiesProcessor,
+      typename UserData,
+      bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary,
+         int YOrthogonalBoundary,
+      typename... GridEntityParameters >
+GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >::
+   const GridPointer& gridPointer,
+   const CoordinatesType& begin,
+   const CoordinatesType& end,
+   SharedPointer< UserData, DeviceType >& userDataPointer,
+   const int& stream,
+   const GridEntityParameters&... gridEntityParameters )
+#ifdef HAVE_MIC   
+   Devices::MIC::synchronizeDevice();
+    //TOHLE JE PRUSER -- nemim poslat vypustku -- 
+    //GridEntity entity( gridPointer.template getData< Devices::MIC >(), begin, gridEntityParameters... );
+    Devices::MICHider<const GridType> hMicGrid;
+    hMicGrid.pointer=& gridPointer.template getData< Devices::MIC >();
+    Devices::MICHider<UserData> hMicUserData;
+    hMicUserData.pointer=& userDataPointer.template modifyData<Devices::MIC>();
+    TNLMICSTRUCT(begin, const CoordinatesType);
+    TNLMICSTRUCT(end, const CoordinatesType);
+    #pragma offload target(mic) in(sbegin,send,hMicUserData,hMicGrid)  
+    {
+        #pragma omp parallel firstprivate( sbegin, send )
+        {     
+            TNLMICSTRUCTUSE(begin, const CoordinatesType);
+            TNLMICSTRUCTUSE(end, const CoordinatesType);    
+            GridEntity entity( *(hMicGrid.pointer), *(kernelbegin) );
+            if( processOnlyBoundaryEntities )
+             {      
+               if( YOrthogonalBoundary )
+                  #pragma omp for
+                  for( auto k = kernelbegin->x();
+                       k <= kernelend->x();
+                       k ++ )
+                  {
+                     entity.getCoordinates().x() = k;
+                     entity.getCoordinates().y() = kernelbegin->y();
+                     entity.refresh();
+                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
+                     entity.getCoordinates().y() = kernelend->y();
+                     entity.refresh();
+                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
+                  }
+               if( XOrthogonalBoundary )
+                  #pragma omp for
+                  for( auto k = kernelbegin->y();
+                       k <= kernelend->y();
+                       k ++ )
+                  {
+                     entity.getCoordinates().y() = k;
+                     entity.getCoordinates().x() = kernelbegin->x();
+                     entity.refresh();
+                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
+                     entity.getCoordinates().x() = kernelend->x();
+                     entity.refresh();
+                     EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
+                  }
+             }
+            else
+            {
+                  #pragma omp for
+                  for( IndexType y = kernelbegin->y(); y <= kernelend->y(); y ++ )
+                     for( IndexType x = kernelbegin->x(); x <= kernelend->x(); x ++ )
+                     {
+                        // std::cerr << x << "   " <<y << std::endl;
+                        entity.getCoordinates().x() = x;
+                        entity.getCoordinates().y() = y;
+                        entity.refresh();
+                        EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity );
+                     }      
+             }
+        }
+    }
@@ -940,7 +1110,7 @@ processEntities(
       cudaStreamSynchronize( s4 );
       cudaStreamSynchronize( s5 );
       cudaStreamSynchronize( s6 );      
-      checkCudaDevice;
@@ -976,11 +1146,76 @@ processEntities(
       if( stream == 0 )
          cudaStreamSynchronize( s );
-         checkCudaDevice;
+   throw Exceptions::CudaSupportMissing();
+ * 3D traverser, MIC
+ */
+template< typename Real,
+          typename Index >
+   template<
+      typename GridEntity,
+      typename EntitiesProcessor,
+      typename UserData,
+      bool processOnlyBoundaryEntities,
+         int XOrthogonalBoundary,
+         int YOrthogonalBoundary,
+         int ZOrthogonalBoundary,
+      typename... GridEntityParameters >
+GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >::
+   const GridPointer& gridPointer,
+   const CoordinatesType& begin,
+   const CoordinatesType& end,
+   SharedPointer< UserData, DeviceType >& userDataPointer,
+   const int& stream,
+   const GridEntityParameters&... gridEntityParameters )
+    std::cout << "Not Implemented yet Grid Traverser <3, Real, Device::MIC>" << std::endl;
+/* HAVE_CUDA   
+   dim3 cudaBlockSize( 8, 8, 8 );
+   dim3 cudaBlocks;
+   cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x );
+   cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y );
+   cudaBlocks.z = Devices::Cuda::getNumberOfBlocks( end.z() - begin.z() + 1, cudaBlockSize.z );
+   const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x );
+   const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y );
+   const IndexType cudaZGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.z );
+   auto& pool = CudaStreamPool::getInstance();
+   const cudaStream_t& s = pool.getStream( stream );
+   Devices::Cuda::synchronizeDevice();
+   for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ )
+      for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
+         for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ )
+            GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
+               <<< cudaBlocks, cudaBlockSize, 0, s >>>
+               ( &gridPointer.template getData< Devices::Cuda >(),
+                 &userDataPointer.template modifyData< Devices::Cuda >(),
+                 begin,
+                 end,
+                 gridXIdx,
+                 gridYIdx,
+                 gridZIdx,
+                 gridEntityParameters... );
+   // only launches into the stream 0 are synchronized
+   if( stream == 0 )
+   {
+      cudaStreamSynchronize( s );
+      checkCudaDevice;
+   }
+ */
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
new file mode 100644
index 0000000000000000000000000000000000000000..c8f3a999b90a08e02b9bcb2cfb29b0270476f046
--- /dev/null
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h
@@ -0,0 +1,173 @@
+                          NeighborGridEntitiesStorage.h  -  description
+                             -------------------
+    begin                : Dec 18, 2015
+    copyright            : (C) 2015 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Meshes/MeshDimensionTag.h>
+#include <TNL/Meshes/GridEntityConfig.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
+namespace TNL {
+namespace Meshes {
+template< typename GridEntity,
+          int NeighborEntityDimension,
+          typename GridEntityConfig,
+          bool storage = GridEntityConfig::template neighborEntityStorage< GridEntity >( NeighborEntityDimension ) >
+class NeighborGridEntityLayer{};   
+template< typename GridEntity,
+          int NeighborEntityDimension,
+          typename GridEntityConfig >
+class NeighborGridEntityLayer< GridEntity, NeighborEntityDimension, GridEntityConfig, true >
+: public NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig >
+   public:
+      typedef NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig > BaseType;
+      typedef NeighborGridEntityGetter< GridEntity, NeighborEntityDimension > NeighborEntityGetterType;
+      using BaseType::getNeighborEntities;
+      __cuda_callable__
+      NeighborGridEntityLayer( const GridEntity& entity )
+      : BaseType( entity ),
+        neighborEntities( entity )
+      {}
+      __cuda_callable__
+      const NeighborEntityGetterType& getNeighborEntities( const MeshDimensionTag< NeighborEntityDimension>& tag ) const
+      {
+         return this->neighborEntities;
+      }
+      __cuda_callable__
+      void refresh( const typename GridEntity::GridType& grid,
+                    const typename GridEntity::GridType::IndexType& entityIndex )
+      {
+         BaseType::refresh( grid, entityIndex );
+         neighborEntities.refresh( grid, entityIndex );
+      }
+   protected:
+      NeighborEntityGetterType neighborEntities;
+template< typename GridEntity,
+          typename GridEntityConfig >
+class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, true >
+   public:
+      typedef NeighborGridEntityGetter< GridEntity, 0 > NeighborEntityGetterType;
+      __cuda_callable__
+      NeighborGridEntityLayer( const GridEntity& entity )
+      : neighborEntities( entity )
+      {}
+      __cuda_callable__
+      const NeighborEntityGetterType& getNeighborEntities( const MeshDimensionTag< 0 >& tag ) const
+      {
+         return this->neighborEntities;
+      }
+      __cuda_callable__
+      void refresh( const typename GridEntity::GridType& grid,
+                    const typename GridEntity::GridType::IndexType& entityIndex )
+      {
+         neighborEntities.refresh( grid, entityIndex );
+      }
+   protected:
+      NeighborEntityGetterType neighborEntities;
+template< typename GridEntity,
+          int NeighborEntityDimension,
+          typename GridEntityConfig >
+class NeighborGridEntityLayer< GridEntity, NeighborEntityDimension, GridEntityConfig, false >
+: public NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig >
+   public:
+      typedef NeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1, GridEntityConfig > BaseType;      
+      typedef NeighborGridEntityGetter< GridEntity, NeighborEntityDimension > NeighborEntityGetterType;
+      using BaseType::getNeighborEntities;
+      __cuda_callable__
+      NeighborGridEntityLayer( const GridEntity& entity )
+      : BaseType( entity )
+      {}
+      __cuda_callable__
+      const NeighborEntityGetterType& getNeighborEntities( const MeshDimensionTag< NeighborEntityDimension >& tag ) const {}
+      __cuda_callable__
+      void refresh( const typename GridEntity::GridType& grid,
+                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
+template< typename GridEntity,
+          typename GridEntityConfig >
+class NeighborGridEntityLayer< GridEntity, 0, GridEntityConfig, false >
+   public:
+      typedef NeighborGridEntityGetter< GridEntity, 0 > NeighborEntityGetterType;
+      __cuda_callable__
+      NeighborGridEntityLayer( const GridEntity& entity ){}
+      __cuda_callable__
+      const NeighborEntityGetterType& getNeighborEntities( const MeshDimensionTag< 0 >& tag ) const {}
+      __cuda_callable__
+      void refresh( const typename GridEntity::GridType& grid,
+                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
+template< typename GridEntity,
+          typename GridEntityConfig >
+class NeighborGridEntitiesStorage
+: public NeighborGridEntityLayer< GridEntity, GridEntity::getMeshDimension(), GridEntityConfig >
+   typedef NeighborGridEntityLayer< GridEntity, GridEntity::getMeshDimension(), GridEntityConfig > BaseType;
+   public:
+      using BaseType::getNeighborEntities;
+      using BaseType::refresh;
+      __cuda_callable__
+      NeighborGridEntitiesStorage( const GridEntity& entity )
+      : BaseType( entity )
+      {}
+      template< int EntityDimension >
+      __cuda_callable__
+      const NeighborGridEntityGetter< GridEntity, EntityDimension >&
+      getNeighborEntities() const
+      {
+         return BaseType::getNeighborEntities( MeshDimensionTag< EntityDimension >() );
+      }
+} // namespace Meshes
+} // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
similarity index 77%
rename from src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h
rename to src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
index ec6cd3f297530a50c5f6f853eccde3efa65af666..84a9c56d9389f31c013206c10f63fceb81ec2e0c 100644
--- a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h
@@ -1,5 +1,5 @@
-                          NeighbourGridEntityGetter.h  -  description
+                          NeighborGridEntityGetter.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -11,23 +11,24 @@
 #pragma once
 #include <TNL/Assert.h>
+#include <TNL/Devices/Cuda.h>
 #include <TNL/Meshes/GridEntityConfig.h>
 namespace TNL {
 namespace Meshes {
 template< typename GridEntity,
-          int NeighbourEntityDimension,
+          int NeighborEntityDimension,
           typename EntityStencilTag =
-            GridEntityStencilStorageTag< GridEntity::ConfigType::template neighbourEntityStorage< GridEntity >( NeighbourEntityDimension ) > >
-class NeighbourGridEntityGetter
+            GridEntityStencilStorageTag< GridEntity::ConfigType::template neighborEntityStorage< GridEntity >( NeighborEntityDimension ) > >
+class NeighborGridEntityGetter
       // TODO: not all specializations are implemented yet
-      NeighbourGridEntityGetter( const GridEntity& entity )
+      NeighborGridEntityGetter( const GridEntity& entity )
          //TNL_ASSERT( false, );
diff --git a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter1D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
similarity index 63%
rename from src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter1D_impl.h
rename to src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
index a8e91bcefc73ef9e4b5eaaf41bf1bf4f8e6f0595..14143220eb451521afb6098e67ebc9889559a68a 100644
--- a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h
@@ -1,5 +1,5 @@
-                          NeighbourGridEntityGetter1D_impl.h  -  description
+                          NeighborGridEntityGetter1D_impl.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -10,7 +10,7 @@
 #pragma once
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
@@ -21,7 +21,7 @@ namespace Meshes {
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       1         |              1            |       ----        |
  * +-----------------+---------------------------+-------------------+
@@ -30,7 +30,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 1, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -38,46 +38,40 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 1;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( this->entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    this->entity.getCoordinates() < this->entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << this->entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( step ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates() + CoordinatesType( step ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + step ) );
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + step ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( step ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates() + CoordinatesType( step ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -97,7 +91,7 @@ class NeighbourGridEntityGetter<
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       1         |              1            |  Cross/Full       |
  * +-----------------+---------------------------+-------------------+
@@ -107,7 +101,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 1, Config >,
    StencilStorage >
@@ -115,49 +109,43 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 1;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
-      typedef NeighbourGridEntityGetter< GridEntityType, 1, StencilStorage > ThisType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
+      typedef NeighborGridEntityGetter< GridEntityType, 1, StencilStorage > ThisType;
       static const int stencilSize = Config::getStencilSize();
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( this->entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    this->entity.getCoordinates() < this->entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << this->entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( step ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates() + CoordinatesType( step ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(), CoordinatesType( entity.getCoordinates().x() + step ) );
+         return NeighborGridEntityType( this->entity.getMesh(), CoordinatesType( entity.getCoordinates().x() + step ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( step ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates() + CoordinatesType( step ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -179,9 +167,9 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencil[ index + stencilSize ] = entityIndex + index;
+               neighborEntityGetter.stencil[ index + stencilSize ] = entityIndex + index;
@@ -202,7 +190,7 @@ class NeighbourGridEntityGetter<
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       1         |              0            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -211,7 +199,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 1, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -219,46 +207,40 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 1;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step + ( step < 0 ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step + ( step < 0 ) <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + step + ( step < 0 ) ) );
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + step + ( step < 0 ) ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step + ( step < 0 ) >= CoordinatesType( 0 ).x() &&
                     entity.getCoordinates().x() + step + ( step < 0 ) <= entity.getMesh().getDimensions().x(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -274,13 +256,13 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       0         |              1            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -290,7 +272,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 0, Config >,
    StencilStorage > //GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -298,17 +280,17 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 0;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
@@ -316,30 +298,24 @@ class NeighbourGridEntityGetter<
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step - ( step > 0 ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step - ( step > 0 ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + step - ( step > 0 ) ) );
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + step - ( step > 0 ) ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step - ( step > 0 ) >= 0 &&
                     entity.getCoordinates().x() + step - ( step > 0 ) < entity.getMesh().getDimensions().x(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -358,7 +334,7 @@ class NeighbourGridEntityGetter<
 /****   TODO: Implement this, now it is only a copy of specialization for none stencil storage
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       0         |              1            |       Cross       |
  * +-----------------+---------------------------+-------------------+
@@ -367,7 +343,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 0, Config >,
    GridEntityStencilStorageTag< GridEntityCrossStencil > >
@@ -375,47 +351,41 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 0;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step - ( step > 0 ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step - ( step > 0 ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + step - ( step > 0 ) ) );
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + step - ( step > 0 ) ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step - ( step > 0 ) >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step - ( step > 0 ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -435,7 +405,7 @@ class NeighbourGridEntityGetter<
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       0         |              0            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -444,7 +414,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 1, Real, Device, Index >, 0, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -452,46 +422,40 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 0;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int step >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + step ) );
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + step ) );
       template< int step >
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates().x() + step >= CoordinatesType( 0 ) &&
                     entity.getCoordinates().x() + step <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
@@ -511,5 +475,4 @@ class NeighbourGridEntityGetter<
 } // namespace Meshes
-} // namespace TNL
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter2D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
similarity index 70%
rename from src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter2D_impl.h
rename to src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
index 2ccb4568fa73c30356d306f1a0bf98075481d3aa..b760748cd23a84552891d095f9053058d4b012ca 100644
--- a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h
@@ -1,5 +1,5 @@
-                          NeighbourGridEntityGetter2D_impl.h  -  description
+                          NeighborGridEntityGetter2D_impl.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -10,7 +10,7 @@
 #pragma once
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
@@ -20,7 +20,7 @@ namespace Meshes {
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              2            | No specialization |
  * +-----------------+---------------------------+-------------------+
@@ -30,7 +30,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    StencilStorage >
@@ -38,35 +38,32 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->grid,
+         return NeighborGridEntityType( this->grid,
                                          CoordinatesType( entity.getCoordinates().x() + stepX,
                                                           entity.getCoordinates().y() + stepY ) );
@@ -75,11 +72,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
@@ -95,12 +89,12 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              2            |       Cross       |
  * +-----------------+---------------------------+-------------------+
@@ -109,7 +103,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    GridEntityStencilStorageTag< GridEntityCrossStencil > >
@@ -117,40 +111,37 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage;
-      typedef NeighbourGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
+      typedef NeighborGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
       static const int stencilSize = Config::getStencilSize();
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-            return NeighbourGridEntityType( this->entity.getMesh(),
+            return NeighborGridEntityType( this->entity.getMesh(),
                                             CoordinatesType( entity.getCoordinates().x() + stepX,
                                                              entity.getCoordinates().y() + stepY ) );
@@ -159,11 +150,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
@@ -189,9 +177,9 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
+               neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
@@ -201,10 +189,10 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilY[ index + stencilSize ] =
-                  entityIndex + index * neighbourEntityGetter.entity.getMesh().getDimensions().x();
+               neighborEntityGetter.stencilY[ index + stencilSize ] =
+                  entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x();
@@ -226,12 +214,12 @@ class NeighbourGridEntityGetter<
       IndexType stencilX[ 2 * stencilSize + 1 ];
       IndexType stencilY[ 2 * stencilSize + 1 ];
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              1            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -241,7 +229,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    StencilStorage >
@@ -249,33 +237,29 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef typename GridEntityType::EntityOrientationType EntityOrientationType;
       typedef typename GridEntityType::EntityBasisType EntityBasisType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( ! stepX + ! stepY == 1,
-                    std::cerr << "Only one of the steps can be non-zero: stepX = " << stepX << " stepY = " << stepY );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         static_assert( ! stepX + ! stepY == 1, "Only one of the steps can be non-zero." );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ),
                                         stepY + ( stepY < 0 ) ) >= CoordinatesType( 0, 0 ) &&
@@ -287,7 +271,7 @@ class NeighbourGridEntityGetter<
                    << entity.getCoordinates()  + CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(),
+         return NeighborGridEntityType( this->entity.getMesh(),
                                          CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                           entity.getCoordinates().y() + stepY + ( stepY < 0 ) ),
                                          EntityOrientationType( stepX ? (stepX > 0 ? 1 : -1) : 0,
@@ -312,7 +296,7 @@ class NeighbourGridEntityGetter<
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |            0              |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -322,7 +306,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    StencilStorage >
@@ -330,31 +314,28 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
          TNL_ASSERT( stepX != 0 && stepY != 0,
                     std::cerr << " stepX = " << stepX << " stepY = " << stepY );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ) ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() +
@@ -365,7 +346,7 @@ class NeighbourGridEntityGetter<
                    << " entity.getMesh().getDimensions() + CoordinatesType( sign( stepX ), sign( stepY ) ) = "
                    << entity.getMesh().getDimensions()  + CoordinatesType( sign( stepX ), sign( stepY ) )
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->grid,
+         return NeighborGridEntityType( this->grid,
                                          CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                           entity.getCoordinates().y() + stepY + ( stepY < 0 ) ) );
@@ -384,12 +365,12 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       1         |              2            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -399,7 +380,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 1, Config >,
    StencilStorage >
@@ -407,33 +388,30 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 1;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
          /*TNL_ASSERT( ( ( !! stepX ) == ( !! entity.getOrientation().x() ) ) &&
                     ( ( !! stepY ) == ( !! entity.getOrientation().y() ) ),
                     std::cerr << "( stepX, stepY ) cannot be perpendicular to entity coordinates: stepX = " << stepX << " stepY = " << stepY
                          << " entity.getOrientation() = " << entity.getOrientation() );*/
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions() + entity.getOrientation(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() + entity.getOrientation() = " << entity.getMesh().getDimensions() + entity.getOrientation()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX - ( stepX > 0 ) * ( entity.getOrientation().x() != 0.0 ),
                                         stepY - ( stepY > 0 ) * ( entity.getOrientation().y() != 0.0 ) ) >= CoordinatesType( 0, 0 ) &&
@@ -444,7 +422,7 @@ class NeighbourGridEntityGetter<
                    << entity.getCoordinates()  + CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(),
+         return NeighborGridEntityType( this->entity.getMesh(),
                      CoordinatesType( entity.getCoordinates().x() + stepX - ( stepX > 0 ) * ( entity.getOrientation().x() != 0.0 ),
                                       entity.getCoordinates().y() + stepY - ( stepY > 0 ) * ( entity.getOrientation().y() != 0.0 ) ) );
@@ -466,7 +444,7 @@ class NeighbourGridEntityGetter<
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       0         |              0            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -476,7 +454,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 0, Config >,
    StencilStorage >
@@ -484,35 +462,32 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 0;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->grid,
+         return NeighborGridEntityType( this->grid,
                                          CoordinatesType( entity.getCoordinates().x() + stepX,
                                                           entity.getCoordinates().y() + stepY ) );
@@ -521,11 +496,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
@@ -541,9 +513,8 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter3D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
similarity index 72%
rename from src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter3D_impl.h
rename to src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
index 682850bb1b3a442f7bf6a227418859496c690375..a7d2ab6d879cb1779b44dbe56f7f98464cd71473 100644
--- a/src/TNL/Meshes/GridDetails/NeighbourGridEntityGetter3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h
@@ -1,5 +1,5 @@
-                          NeighbourGridEntityGetter3D_impl.h  -  description
+                          NeighborGridEntityGetter3D_impl.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -10,7 +10,7 @@
 #pragma once
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h>
 #include <TNL/Meshes/GridDetails/Grid1D.h>
 #include <TNL/Meshes/GridDetails/Grid2D.h>
 #include <TNL/Meshes/GridDetails/Grid3D.h>
@@ -21,7 +21,7 @@ namespace Meshes {
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |              3            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -30,7 +30,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -38,35 +38,32 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 3;
+      static const int NeighborEntityDimension = 3;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX,
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX,
                                                       entity.getCoordinates().y() + stepY,
                                                       entity.getCoordinates().z() + stepZ ) );
@@ -75,11 +72,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ ) = "
@@ -96,14 +90,14 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |              3            |       Cross       |
  * +-----------------+---------------------------+-------------------+
@@ -112,7 +106,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityCrossStencil > >
@@ -120,39 +114,36 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 3;
+      static const int NeighborEntityDimension = 3;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage;
-      typedef NeighbourGridEntityGetter< GridEntityType, 3, StencilStorage > ThisType;
+      typedef NeighborGridEntityGetter< GridEntityType, 3, StencilStorage > ThisType;
       static const int stencilSize = Config::getStencilSize();
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY ) = " << entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(), CoordinatesType( entity.getCoordinates().x() + stepX,
+         return NeighborGridEntityType( this->entity.getMesh(), CoordinatesType( entity.getCoordinates().x() + stepX,
                                                       entity.getCoordinates().y() + stepY,
                                                       entity.getCoordinates().z() + stepZ ) );
@@ -161,11 +152,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) < entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ ) = "
@@ -195,9 +183,9 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
+               neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
@@ -207,10 +195,10 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilY[ index + stencilSize ] =
-                  entityIndex + index * neighbourEntityGetter.entity.getMesh().getDimensions().x();
+               neighborEntityGetter.stencilY[ index + stencilSize ] =
+                  entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x();
@@ -220,10 +208,10 @@ class NeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilZ[ index + stencilSize ] =
-                  entityIndex + index * neighbourEntityGetter.entity.getMesh().cellZNeighboursStep;
+               neighborEntityGetter.stencilZ[ index + stencilSize ] =
+                  entityIndex + index * neighborEntityGetter.entity.getMesh().cellZNeighborsStep;
@@ -248,12 +236,12 @@ class NeighbourGridEntityGetter<
       IndexType stencilY[ 2 * stencilSize + 1 ];
       IndexType stencilZ[ 2 * stencilSize + 1 ];
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |              2            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -262,7 +250,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -270,35 +258,29 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef typename GridEntityType::EntityOrientationType EntityOrientationType;
       typedef typename GridEntityType::EntityBasisType EntityBasisType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( ! stepX + ! stepY + ! stepZ == 2,
-                    std::cerr << "Only one of the steps can be non-zero: stepX = " << stepX
-                         << " stepY = " << stepY
-                         << " stepZ = " << stepZ );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         static_assert( ! stepX + ! stepY + ! stepZ == 2, "Only one of the steps can be non-zero." );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ),
                                         stepY + ( stepY < 0 ),
@@ -313,7 +295,7 @@ class NeighbourGridEntityGetter<
                    << entity.getCoordinates()  + CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ), stepZ + ( stepZ < 0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(),
+         return NeighborGridEntityType( this->entity.getMesh(),
                                          CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                           entity.getCoordinates().y() + stepY + ( stepY < 0 ),
                                                           entity.getCoordinates().z() + stepZ + ( stepZ < 0 ) ),
@@ -337,12 +319,12 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
 /****      TODO: Finish it, knonw it is only a copy of specialization for none stored stencil
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |              2            |       Cross       |
  * +-----------------+---------------------------+-------------------+
@@ -351,7 +333,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityCrossStencil > >
@@ -359,35 +341,29 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef typename GridEntityType::EntityOrientationType EntityOrientationType;
       typedef typename GridEntityType::EntityBasisType EntityBasisType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( ! stepX + ! stepY + ! stepZ == 2,
-                    std::cerr << "Only one of the steps can be non-zero: stepX = " << stepX
-                         << " stepY = " << stepY
-                         << " stepZ = " << stepZ );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         static_assert( ! stepX + ! stepY + ! stepZ == 2, "Only one of the steps can be non-zero." );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ),
                                         stepY + ( stepY < 0 ),
@@ -402,7 +378,7 @@ class NeighbourGridEntityGetter<
                    << entity.getCoordinates()  + CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ), stepZ + ( stepZ < 0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( this->entity.getMesh(),
+         return NeighborGridEntityType( this->entity.getMesh(),
                                          CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                           entity.getCoordinates().y() + stepY + ( stepY < 0 ),
                                                           entity.getCoordinates().z() + stepZ + ( stepZ < 0 ) ),
@@ -426,13 +402,13 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |              1            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -441,7 +417,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -449,35 +425,29 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 1;
+      static const int NeighborEntityDimension = 1;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       typedef typename GridEntityType::EntityOrientationType EntityOrientationType;
       typedef typename GridEntityType::EntityBasisType EntityBasisType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( ! stepX + ! stepY + ! stepZ == 1,
-                    std::cerr << "Exactly two of the steps must be non-zero: stepX = " << stepX
-                         << " stepY = " << stepY
-                         << " stepZ = " << stepZ );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         static_assert( ! stepX + ! stepY + ! stepZ == 1, "Exactly two of the steps must be non-zero." );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ),
                                         stepY + ( stepY < 0 ),
@@ -492,7 +462,7 @@ class NeighbourGridEntityGetter<
                    << entity.getCoordinates()  + CoordinatesType( stepX + ( stepX < 0 ), stepY + ( stepY < 0 ), stepZ + ( stepZ < 0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                       entity.getCoordinates().y() + stepY + ( stepY < 0 ),
                                                       entity.getCoordinates().z() + stepZ + ( stepZ < 0 ) ),
                                      EntityOrientationType( !!stepX, !!stepY, !!stepZ ),
@@ -513,13 +483,13 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       3         |            0              |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -528,7 +498,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 3, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -536,33 +506,30 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 3;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY,int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
          TNL_ASSERT( stepX != 0 && stepY != 0 && stepZ != 0,
                     std::cerr << " stepX = " << stepX
                          << " stepY = " << stepY
                          << " stepZ = " << stepZ );
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX + ( stepX < 0 ),
                                         stepY + ( stepY < 0 ),
@@ -578,7 +545,7 @@ class NeighbourGridEntityGetter<
                    << " entity.getMesh().getDimensions() + CoordinatesType( sign( stepX ), sign( stepY ), sign( stepZ ) ) = "
                    << entity.getMesh().getDimensions()  + CoordinatesType( sign( stepX ), sign( stepY ), sign( stepZ ) )
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX + ( stepX < 0 ),
                                                       entity.getCoordinates().y() + stepY + ( stepY < 0 ),
                                                       entity.getCoordinates().z() + stepZ + ( stepZ < 0 ) ) );
@@ -597,12 +564,12 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              3            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -611,7 +578,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 2, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -619,23 +586,23 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 3;
+      static const int NeighborEntityDimension = 3;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
          /*TNL_ASSERT( ( ( !! stepX ) == ( !! entity.getOrientation().x() ) ) &&
                     ( ( !! stepY ) == ( !! entity.getOrientation().y() ) ) &&
@@ -643,11 +610,8 @@ class NeighbourGridEntityGetter<
                     std::cerr << "( stepX, stepY, stepZ ) cannot be perpendicular to entity coordinates: stepX = " << stepX
                          << " stepY = " << stepY << " stepZ = " << stepZ
                          << " entity.getOrientation() = " << entity.getOrientation() );*/
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() < entity.getMesh().getDimensions() + entity.getOrientation(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() + entity.getOrientation() = " << entity.getMesh().getDimensions() + entity.getOrientation()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LT( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() +
                        CoordinatesType( stepX - ( stepX > 0 ) * ( entity.getOrientation().x() != 0.0 ),
                                         stepY - ( stepY > 0 ) * ( entity.getOrientation().y() != 0.0 ),
@@ -663,7 +627,7 @@ class NeighbourGridEntityGetter<
                         stepZ + ( stepZ < 0 ) * ( entity.getOrientation().z() != 0.0 ) )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntityType( entity.getMesh(),
+         return NeighborGridEntityType( entity.getMesh(),
                                          CoordinatesType( entity.getCoordinates().x() + stepX - ( stepX > 0 ) * ( entity.getOrientation().x() != 0.0 ),
                                                           entity.getCoordinates().y() + stepY - ( stepY > 0 ) * ( entity.getOrientation().y() != 0.0 ),
                                                           entity.getCoordinates().z() + stepZ - ( stepZ > 0 ) * ( entity.getOrientation().z() != 0.0 ) ) );
@@ -683,12 +647,12 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stored Stencil   |
+ * | EntityDimenions | NeighborEntityDimension |  Stored Stencil   |
  * +-----------------+---------------------------+-------------------+
  * |       0         |              0            |       None        |
  * +-----------------+---------------------------+-------------------+
@@ -697,7 +661,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class NeighbourGridEntityGetter<
+class NeighborGridEntityGetter<
    GridEntity< Meshes::Grid< 3, Real, Device, Index >, 0, Config >,
    GridEntityStencilStorageTag< GridEntityNoStencil > >
@@ -705,36 +669,33 @@ class NeighbourGridEntityGetter<
       static const int EntityDimension = 0;
-      static const int NeighbourEntityDimension = 0;
+      static const int NeighborEntityDimension = 0;
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetterType;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetterType;
       __cuda_callable__ inline
-      NeighbourGridEntityGetter( const GridEntityType& entity )
+      NeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
       template< int stepX, int stepY, int stepZ >
       __cuda_callable__ inline
-      NeighbourGridEntityType getEntity() const
+      NeighborGridEntityType getEntity() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ ) = "
                    << entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ )
                    << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
                    << " EntityDimension = " << EntityDimension );
-         return NeighbourGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX,
+         return NeighborGridEntity( CoordinatesType( entity.getCoordinates().x() + stepX,
                                                       entity.getCoordinates().y() + stepY,
                                                       entity.getCoordinates().z() + stepZ ) );
@@ -743,11 +704,8 @@ class NeighbourGridEntityGetter<
       __cuda_callable__ inline
       IndexType getEntityIndex() const
-         TNL_ASSERT( entity.getCoordinates() >= CoordinatesType( 0, 0, 0 ) &&
-                    entity.getCoordinates() <= entity.getMesh().getDimensions(),
-              std::cerr << "entity.getCoordinates() = " << entity.getCoordinates()
-                   << " entity.getMesh().getDimensions() = " << entity.getMesh().getDimensions()
-                   << " EntityDimension = " << EntityDimension );
+         TNL_ASSERT_GE( entity.getCoordinates(), CoordinatesType( 0, 0, 0 ), "wrong coordinates" );
+         TNL_ASSERT_LE( entity.getCoordinates(), entity.getMesh().getDimensions(), "wrong coordinates" );
          TNL_ASSERT( entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) >= CoordinatesType( 0, 0, 0 ) &&
                     entity.getCoordinates() + CoordinatesType( stepX, stepY, stepZ ) <= entity.getMesh().getDimensions(),
               std::cerr << "entity.getCoordinates()  + CoordinatesType( stepX, stepY, stepZ ) = "
@@ -764,10 +722,9 @@ class NeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //NeighbourGridEntityGetter(){};
+      //NeighborGridEntityGetter(){};
 } // namespace Meshes
-} // namespace TNL
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Meshes/GridDetails/NeighbourGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighbourGridEntitiesStorage.h
deleted file mode 100644
index ec28a6fffd7ee33933b7d3b8897ab41d110d46b1..0000000000000000000000000000000000000000
--- a/src/TNL/Meshes/GridDetails/NeighbourGridEntitiesStorage.h
+++ /dev/null
@@ -1,173 +0,0 @@
-                          NeighbourGridEntitiesStorage.h  -  description
-                             -------------------
-    begin                : Dec 18, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#include <TNL/Devices/Cuda.h>
-#include <TNL/Meshes/MeshDimensionTag.h>
-#include <TNL/Meshes/GridEntityConfig.h>
-#include <TNL/Meshes/GridDetails/NeighbourGridEntityGetter.h>
-namespace TNL {
-namespace Meshes {
-template< typename GridEntity,
-          int NeighbourEntityDimension,
-          typename GridEntityConfig,
-          bool storage = GridEntityConfig::template neighbourEntityStorage< GridEntity >( NeighbourEntityDimension ) >
-class NeighbourGridEntityLayer{};   
-template< typename GridEntity,
-          int NeighbourEntityDimension,
-          typename GridEntityConfig >
-class NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension, GridEntityConfig, true >
-: public NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1, GridEntityConfig >
-   public:
-      typedef NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1, GridEntityConfig > BaseType;
-      typedef NeighbourGridEntityGetter< GridEntity, NeighbourEntityDimension > NeighbourEntityGetterType;
-      using BaseType::getNeighbourEntities;
-      __cuda_callable__
-      NeighbourGridEntityLayer( const GridEntity& entity )
-      : BaseType( entity ),
-        neighbourEntities( entity )
-      {}
-      __cuda_callable__
-      const NeighbourEntityGetterType& getNeighbourEntities( const MeshDimensionTag< NeighbourEntityDimension>& tag ) const
-      {
-         return this->neighbourEntities;
-      }
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex )
-      {
-         BaseType::refresh( grid, entityIndex );
-         neighbourEntities.refresh( grid, entityIndex );
-      }
-   protected:
-      NeighbourEntityGetterType neighbourEntities;
-template< typename GridEntity,
-          typename GridEntityConfig >
-class NeighbourGridEntityLayer< GridEntity, 0, GridEntityConfig, true >
-   public:
-      typedef NeighbourGridEntityGetter< GridEntity, 0 > NeighbourEntityGetterType;
-      __cuda_callable__
-      NeighbourGridEntityLayer( const GridEntity& entity )
-      : neighbourEntities( entity )
-      {}
-      __cuda_callable__
-      const NeighbourEntityGetterType& getNeighbourEntities( const MeshDimensionTag< 0 >& tag ) const
-      {
-         return this->neighbourEntities;
-      }
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex )
-      {
-         neighbourEntities.refresh( grid, entityIndex );
-      }
-   protected:
-      NeighbourEntityGetterType neighbourEntities;
-template< typename GridEntity,
-          int NeighbourEntityDimension,
-          typename GridEntityConfig >
-class NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension, GridEntityConfig, false >
-: public NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1, GridEntityConfig >
-   public:
-      typedef NeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1, GridEntityConfig > BaseType;      
-      typedef NeighbourGridEntityGetter< GridEntity, NeighbourEntityDimension > NeighbourEntityGetterType;
-      using BaseType::getNeighbourEntities;
-      __cuda_callable__
-      NeighbourGridEntityLayer( const GridEntity& entity )
-      : BaseType( entity )
-      {}
-      __cuda_callable__
-      const NeighbourEntityGetterType& getNeighbourEntities( const MeshDimensionTag< NeighbourEntityDimension >& tag ) const {}
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
-template< typename GridEntity,
-          typename GridEntityConfig >
-class NeighbourGridEntityLayer< GridEntity, 0, GridEntityConfig, false >
-   public:
-      typedef NeighbourGridEntityGetter< GridEntity, 0 > NeighbourEntityGetterType;
-      __cuda_callable__
-      NeighbourGridEntityLayer( const GridEntity& entity ){}
-      __cuda_callable__
-      const NeighbourEntityGetterType& getNeighbourEntities( const MeshDimensionTag< 0 >& tag ) const {}
-      __cuda_callable__
-      void refresh( const typename GridEntity::GridType& grid,
-                    const typename GridEntity::GridType::IndexType& entityIndex ) {}
-template< typename GridEntity,
-          typename GridEntityConfig >
-class NeighbourGridEntitiesStorage
-: public NeighbourGridEntityLayer< GridEntity, GridEntity::meshDimension, GridEntityConfig >
-   typedef NeighbourGridEntityLayer< GridEntity, GridEntity::meshDimension, GridEntityConfig > BaseType;
-   public:
-      using BaseType::getNeighbourEntities;
-      using BaseType::refresh;
-      __cuda_callable__
-      NeighbourGridEntitiesStorage( const GridEntity& entity )
-      : BaseType( entity )
-      {}
-      template< int EntityDimension >
-      __cuda_callable__
-      const NeighbourGridEntityGetter< GridEntity, EntityDimension >&
-      getNeighbourEntities() const
-      {
-         return BaseType::getNeighbourEntities( MeshDimensionTag< EntityDimension >() );
-      }
-} // namespace Meshes
-} // namespace TNL
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
index 400f71531a5f6f5d92606d9da52b991839098b94..a9310fd1e54faa1e417086f4c5f85765be4b6974 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid1D.h
@@ -25,25 +25,22 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -57,25 +54,22 @@ class Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >
       typedef Meshes::Grid< 1, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
index 6fcc348b509bff617b4739f7dfd02cac2f750651..c6102d04c574a32aa4023f5a0ccd0630528db5f7 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid1D_impl.h
@@ -29,12 +29,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary cells
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimensions." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -80,12 +80,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior cells
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -132,13 +132,13 @@ void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 1 >::
    const GridPointer& gridPointer,
-   SharedPointer< UserData, DeviceType >& userDataPointer ) const
+   SharedPointer< UserData, Device >& userDataPointer ) const
     * All cells
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimensions." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -187,12 +187,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true >(
@@ -210,12 +210,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
@@ -234,12 +234,12 @@ void
 Traverser< Meshes::Grid< 1, Real, Device, Index >, GridEntity, 0 >::
    const GridPointer& gridPointer,
-   SharedPointer< UserData, DeviceType >& userDataPointer ) const
+   SharedPointer< UserData, Device >& userDataPointer ) const
     * All vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
index 6123ce9d1a712a916775c45fe3d856e285f8196c..d80205feede8fb2e08cbcd4e92fe91d3c852904f 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid2D.h
@@ -25,24 +25,21 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -55,25 +52,22 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -86,25 +80,22 @@ class Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
index 544076f8328c301347813c81e169f879d36e79d0..8cdc9eafe673588965ba2cdf2f7bb4fa3a144188 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid2D_impl.h
@@ -27,12 +27,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary cells
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -101,13 +101,13 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior cells
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimensions." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -166,12 +166,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 2 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All cells
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
    auto distributedgrid=gridPointer->GetDistGrid();
@@ -233,12 +233,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary faces
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 0, CoordinatesType, CoordinatesType >(
@@ -268,12 +268,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior faces
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, CoordinatesType, CoordinatesType >(
@@ -303,12 +303,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 1 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All faces
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, CoordinatesType, CoordinatesType >(
@@ -338,12 +338,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1 >(
@@ -362,12 +362,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
@@ -386,12 +386,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 2, Real, Device, Index >, GridEntity, 0 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h b/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
index a03503e302bb346cf096d893859fe61fcb4de24d..705e4bbc033ca36708306894ec15186074d9460a 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid3D.h
@@ -25,24 +25,21 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -55,24 +52,21 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -85,25 +79,22 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
@@ -116,25 +107,22 @@ class Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >
       typedef Meshes::Grid< 3, Real, Device, Index > GridType;
       typedef SharedPointer< GridType > GridPointer;
-      typedef Real RealType;
-      typedef Device DeviceType;
-      typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processBoundaryEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processInteriorEntities( const GridPointer& gridPointer,
-                                    SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                                    SharedPointer< UserData, Device >& userDataPointer ) const;
       template< typename UserData,
                 typename EntitiesProcessor >
       void processAllEntities( const GridPointer& gridPointer,
-                               SharedPointer< UserData, DeviceType >& userDataPointer ) const;
+                               SharedPointer< UserData, Device >& userDataPointer ) const;
 } // namespace Meshes
diff --git a/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
index 5f98a59d55855099162f70718ef4d75f3e8f9107..84e44ef17c29aca27da6c940e6ab29841fb9aa86 100644
--- a/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Traverser_Grid3D_impl.h
@@ -27,12 +27,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary cells
-   static_assert( GridEntity::entityDimension == 3, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 3, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1, 1 >(
@@ -51,12 +51,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior cells
-   static_assert( GridEntity::entityDimension == 3, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 3, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
@@ -75,12 +75,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 3 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All cells
-   static_assert( GridEntity::entityDimension == 3, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 3, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
@@ -102,12 +102,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary faces
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 0, 0, CoordinatesType, CoordinatesType >(
@@ -146,12 +146,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior faces
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, 1, CoordinatesType, CoordinatesType >(
@@ -190,12 +190,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 2 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All faces
-   static_assert( GridEntity::entityDimension == 2, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 2, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, 1, CoordinatesType, CoordinatesType >(
       CoordinatesType( 0, 0, 0 ),
@@ -236,12 +236,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary edges
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 0, 1, 1, CoordinatesType, CoordinatesType >(
@@ -280,12 +280,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior edges
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, 1, CoordinatesType, CoordinatesType >(
@@ -324,12 +324,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 1 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All edges
-   static_assert( GridEntity::entityDimension == 1, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 1, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false, 1, 1, 1, CoordinatesType, CoordinatesType >(
       CoordinatesType( 0, 0, 0 ),
@@ -370,12 +370,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processBoundaryEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Boundary vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, true, 1, 1, 1 >(
@@ -394,12 +394,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processInteriorEntities( const GridPointer& gridPointer,
-                         SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                         SharedPointer< UserData, Device >& userDataPointer ) const
     * Interior vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
@@ -418,12 +418,12 @@ template< typename Real,
 Traverser< Meshes::Grid< 3, Real, Device, Index >, GridEntity, 0 >::
 processAllEntities( const GridPointer& gridPointer,
-                    SharedPointer< UserData, DeviceType >& userDataPointer ) const
+                    SharedPointer< UserData, Device >& userDataPointer ) const
     * All vertices
-   static_assert( GridEntity::entityDimension == 0, "The entity has wrong dimensions." );
+   static_assert( GridEntity::getEntityDimension() == 0, "The entity has wrong dimension." );
    GridTraverser< GridType >::template processEntities< GridEntity, EntitiesProcessor, UserData, false >(
diff --git a/src/TNL/Meshes/GridEntity.h b/src/TNL/Meshes/GridEntity.h
index dee16543008d5ac79551403d2c7cd210527756e4..4db50173285c267e61736a36827073dcb323a002 100644
--- a/src/TNL/Meshes/GridEntity.h
+++ b/src/TNL/Meshes/GridEntity.h
@@ -10,15 +10,15 @@
 #pragma once
-#include <TNL/Meshes/GridDetails/NeighbourGridEntitiesStorage.h>
+#include <TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h>
 namespace TNL {
 namespace Meshes {
 template< typename GridEntity,
-          int NeighbourEntityDimension,
+          int NeighborEntityDimension,
           typename StencilStorage >
-class NeighbourGridEntityGetter;
+class NeighborGridEntityGetter;
 template< typename GridEntityType >
 class BoundaryGridEntityChecker;
@@ -51,28 +51,24 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       typedef typename GridType::CoordinatesType CoordinatesType;
       typedef Config ConfigType;
-      static const int meshDimension = GridType::meshDimension;
+      constexpr static int getMeshDimension() { return GridType::getMeshDimension(); };            
-      static const int entityDimension = EntityDimension;
+      constexpr static int getEntityDimension() { return EntityDimension; };
-      constexpr static int getDimension() { return EntityDimension; };
-      constexpr static int getMeshDimension() { return meshDimension; };            
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef GridEntity< GridType, entityDimension, Config > ThisType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
+      typedef GridEntity< GridType, EntityDimension, Config > ThisType;
       typedef typename GridType::PointType PointType;
-      typedef NeighbourGridEntitiesStorage< ThisType, Config > NeighbourGridEntitiesStorageType;
+      typedef NeighborGridEntitiesStorage< ThisType, Config > NeighborGridEntitiesStorageType;
-      template< int NeighbourEntityDimension = entityDimension >
-      using NeighbourEntities =
-         NeighbourGridEntityGetter<
+      template< int NeighborEntityDimension = getEntityDimension() >
+      using NeighborEntities =
+         NeighborGridEntityGetter<
             GridEntity< Meshes::Grid< Dimension, Real, Device, Index >,
                            Config >,
-            NeighbourEntityDimension >;
+            NeighborEntityDimension >;
       __cuda_callable__ inline
       GridEntity( const GridType& grid );
@@ -116,10 +112,10 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       __cuda_callable__ inline
       void setBasis( const EntityBasisType& basis );
-      template< int NeighbourEntityDimension = entityDimension >
+      template< int NeighborEntityDimension = getEntityDimension() >
       __cuda_callable__ inline
-      const NeighbourEntities< NeighbourEntityDimension >&
-      getNeighbourEntities() const;
+      const NeighborEntities< NeighborEntityDimension >&
+      getNeighborEntities() const;
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
@@ -145,7 +141,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, EntityDimensio
       EntityBasisType basis;
-      NeighbourGridEntitiesStorageType neighbourEntitiesStorage;
+      NeighborGridEntitiesStorageType neighborEntitiesStorage;
       //__cuda_callable__ inline
@@ -175,27 +171,23 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       typedef typename GridType::PointType PointType;
       typedef Config ConfigType;
-      static const int meshDimension = GridType::meshDimension;
-      static const int entityDimension = meshDimension;
-      constexpr static int getDimension() { return entityDimension; };
+      constexpr static int getMeshDimension() { return GridType::getMeshDimension(); };
-      constexpr static int getMeshDimension() { return meshDimension; };
+      constexpr static int getEntityDimension() { return getMeshDimension(); };
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef GridEntity< GridType, entityDimension, Config > ThisType;
-      typedef NeighbourGridEntitiesStorage< ThisType, Config > NeighbourGridEntitiesStorageType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
+      typedef GridEntity< GridType, Dimension, Config > ThisType;
+      typedef NeighborGridEntitiesStorage< ThisType, Config > NeighborGridEntitiesStorageType;
-      template< int NeighbourEntityDimension = entityDimension >
-      using NeighbourEntities =
-         NeighbourGridEntityGetter<
+      template< int NeighborEntityDimension = getEntityDimension() >
+      using NeighborEntities =
+         NeighborGridEntityGetter<
             GridEntity< Meshes::Grid< Dimension, Real, Device, Index >,
-                           entityDimension,
+                           Dimension,
                            Config >,
-            NeighbourEntityDimension >;
+            NeighborEntityDimension >;
       __cuda_callable__ inline
@@ -240,10 +232,10 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       __cuda_callable__ inline
       void setBasis( const EntityBasisType& basis ){};
-      template< int NeighbourEntityDimension = Dimension >
+      template< int NeighborEntityDimension = Dimension >
       __cuda_callable__ inline
-      const NeighbourEntities< NeighbourEntityDimension >&
-      getNeighbourEntities() const;
+      const NeighborEntities< NeighborEntityDimension >&
+      getNeighborEntities() const;
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
@@ -268,7 +260,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension, Con
       CoordinatesType coordinates;
-      NeighbourGridEntitiesStorageType neighbourEntitiesStorage;
+      NeighborGridEntitiesStorageType neighborEntitiesStorage;
       //__cuda_callable__ inline
@@ -298,26 +290,22 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       typedef typename GridType::PointType PointType;
       typedef Config ConfigType;
-      static const int meshDimension = GridType::meshDimension;
-      static const int entityDimension = 0;
-      constexpr static int getDimension() { return entityDimension; };
+      constexpr static int getMeshDimension() { return GridType::getMeshDimension(); };
-      constexpr static int getMeshDimension() { return meshDimension; };
+      constexpr static int getEntityDimension() { return 0; };
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
-      typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
-      typedef GridEntity< GridType, entityDimension, Config > ThisType;
-      typedef NeighbourGridEntitiesStorage< ThisType, Config > NeighbourGridEntitiesStorageType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityOrientationType;
+      typedef Containers::StaticVector< getMeshDimension(), IndexType > EntityBasisType;
+      typedef GridEntity< GridType, 0, Config > ThisType;
+      typedef NeighborGridEntitiesStorage< ThisType, Config > NeighborGridEntitiesStorageType;
-      template< int NeighbourEntityDimension = entityDimension >
-      using NeighbourEntities =
-         NeighbourGridEntityGetter<
+      template< int NeighborEntityDimension = getEntityDimension() >
+      using NeighborEntities =
+         NeighborGridEntityGetter<
             GridEntity< Meshes::Grid< Dimension, Real, Device, Index >,
-                           entityDimension,
+                           0,
                            Config >,
-            NeighbourEntityDimension >;
+            NeighborEntityDimension >;
       __cuda_callable__ inline
@@ -363,10 +351,10 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       void setBasis( const EntityBasisType& basis ){};
-      template< int NeighbourEntityDimension = entityDimension >
+      template< int NeighborEntityDimension = getEntityDimension() >
       __cuda_callable__ inline
-      const NeighbourEntities< NeighbourEntityDimension >&
-      getNeighbourEntities() const;
+      const NeighborEntities< NeighborEntityDimension >&
+      getNeighborEntities() const;
       __cuda_callable__ inline
       bool isBoundaryEntity() const;
@@ -391,7 +379,7 @@ class GridEntity< Meshes::Grid< Dimension, Real, Device, Index >, 0, Config >
       CoordinatesType coordinates;
-      NeighbourGridEntitiesStorageType neighbourEntitiesStorage;
+      NeighborGridEntitiesStorageType neighborEntitiesStorage;
       friend class BoundaryGridEntityChecker< ThisType >;
diff --git a/src/TNL/Meshes/GridEntityConfig.h b/src/TNL/Meshes/GridEntityConfig.h
index e382bdbba82827dc9e3d39760bd6d92f619b7129..a056910a37021c04c383a6673021cb8a072052b6 100644
--- a/src/TNL/Meshes/GridEntityConfig.h
+++ b/src/TNL/Meshes/GridEntityConfig.h
@@ -29,9 +29,9 @@ class GridEntityStencilStorageTag
- * This class says what neighbour grid entity indexes shall be pre-computed and stored in the
- * grid entity structure. If neighbourEntityStorage() returns false, nothing is stored.
- * Otherwise, if neighbour entity storage is enabled, we may store either only neighbour entities in a cross like this
+ * This class says what neighbor grid entity indexes shall be pre-computed and stored in the
+ * grid entity structure. If neighborEntityStorage() returns false, nothing is stored.
+ * Otherwise, if neighbor entity storage is enabled, we may store either only neighbor entities in a cross like this
  *                X
  *   X            X
@@ -39,7 +39,7 @@ class GridEntityStencilStorageTag
  *   X            X
  *                X
- * or all neighbour entities like this
+ * or all neighbor entities like this
  *           XXXXX
  *  XXX      XXXXX
@@ -53,7 +53,7 @@ class GridEntityNoStencilStorage
       template< typename GridEntity >
-      constexpr static bool neighbourEntityStorage( int neighbourEntityStorage )
+      constexpr static bool neighborEntityStorage( int neighborEntityStorage )
          return false;
@@ -70,12 +70,10 @@ class GridEntityCrossStencilStorage
       template< typename GridEntity >
-      constexpr static bool neighbourEntityStorage( const int neighbourEntityDimension )
+      constexpr static bool neighborEntityStorage( const int neighborEntityDimension )
-         return ( GridEntity::entityDimension == GridEntity::GridType::meshDimension &&
-                  neighbourEntityDimension == GridEntity::GridType::meshDimension )
-               // FIXME: how is GridEntityCrossStencil cast to int?
-                * GridEntityCrossStencil;
+         return ( GridEntity::getEntityDimension() == GridEntity::GridType::getMeshDimension() &&
+                  neighborEntityDimension == GridEntity::GridType::getMeshDimension() );
       constexpr static int getStencilSize()
@@ -86,4 +84,3 @@ class GridEntityCrossStencilStorage
 } // namespace Meshes
 } // namespace TNL
diff --git a/src/TNL/Meshes/MeshDetails/CMakeLists.txt b/src/TNL/Meshes/MeshDetails/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/MeshDetails/config/CMakeLists.txt b/src/TNL/Meshes/MeshDetails/config/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/MeshDetails/layers/CMakeLists.txt b/src/TNL/Meshes/MeshDetails/layers/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/MeshDetails/traits/CMakeLists.txt b/src/TNL/Meshes/MeshDetails/traits/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/Topologies/CMakeLists.txt b/src/TNL/Meshes/Topologies/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h
index 206227e9b360a2beaf5fccb8ce4ec26aadec375e..c9c647cd7314d800c4a56739945284530945b575 100644
--- a/src/TNL/Meshes/Traverser.h
+++ b/src/TNL/Meshes/Traverser.h
@@ -15,7 +15,7 @@ namespace Meshes {
 template< typename Mesh,
           typename MeshEntity,
-          int EntitiesDimension = MeshEntity::entityDimension >
+          int EntitiesDimension = MeshEntity::getEntityDimension() >
 class Traverser{};
 } // namespace Meshes
@@ -23,4 +23,4 @@ class Traverser{};
 #include <TNL/Meshes/GridDetails/Traverser_Grid1D.h>
 #include <TNL/Meshes/GridDetails/Traverser_Grid2D.h>
-#include <TNL/Meshes/GridDetails/Traverser_Grid3D.h>
\ No newline at end of file
+#include <TNL/Meshes/GridDetails/Traverser_Grid3D.h>
diff --git a/src/TNL/Object.cpp b/src/TNL/Object.cpp
index 14418d26da1412d8f3785fce4f0c76fb0391e948..b4f364139de556ce410526f1bf94e1e12b4d8e41 100644
--- a/src/TNL/Object.cpp
+++ b/src/TNL/Object.cpp
@@ -46,11 +46,7 @@ String Object :: getSerializationTypeVirtual() const
 bool Object :: save( File& file ) const
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< const char, Devices::Host, int >( magic_number, strlen( magic_number ) ) )
    if( ! file. write( magic_number, strlen( magic_number ) ) )
       return false;
    if( ! this->getSerializationTypeVirtual().save( file ) ) return false;
    return true;
@@ -80,55 +76,34 @@ bool Object :: boundLoad( File& file )
 bool Object :: save( const String& fileName ) const
    File file;
-   if( ! file. open( fileName, tnlWriteMode ) )
+   if( ! file. open( fileName, IOMode::write ) )
-      std::cerr << "I am not bale to open the file " << fileName << " for writing." << std::endl;
+      std::cerr << "I am not able to open the file " << fileName << " for writing." << std::endl;
       return false;
-   if( ! this->save( file ) )
-      return false;
-   if( ! file. close() )
-   {
-      std::cerr << "An error occurred when I was closing the file " << fileName << "." << std::endl;
-      return false;
-   }
-   return true;
+   return this->save( file );
 bool Object :: load( const String& fileName )
    File file;
-   if( ! file. open( fileName, tnlReadMode ) )
+   if( ! file. open( fileName, IOMode::read ) )
-      std::cerr << "I am not bale to open the file " << fileName << " for reading." << std::endl;
+      std::cerr << "I am not able to open the file " << fileName << " for reading." << std::endl;
       return false;
-   if( ! this->load( file ) )
-      return false;
-   if( ! file. close() )
-   {
-      std::cerr << "An error occurred when I was closing the file " << fileName << "." << std::endl;
-      return false;
-   }
-   return true;
+   return this->load( file );
 bool Object :: boundLoad( const String& fileName )
    File file;
-   if( ! file. open( fileName, tnlReadMode ) )
+   if( ! file. open( fileName, IOMode::read ) )
-      std::cerr << "I am not bale to open the file " << fileName << " for reading." << std::endl;
+      std::cerr << "I am not able to open the file " << fileName << " for reading." << std::endl;
       return false;
-   if( ! this->boundLoad( file ) )
-      return false;
-   if( ! file. close() )
-   {
-      std::cerr << "An error occurred when I was closing the file " << fileName << "." << std::endl;
-      return false;
-   }
-   return true;
+   return this->boundLoad( file );
 void Object::setDeprecatedReadMode()
@@ -140,32 +115,34 @@ void Object::setDeprecatedReadMode()
 bool getObjectType( File& file, String& type )
    char mn[ 10 ];
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< char, Devices::Host, int >( mn, strlen( magic_number ) ) )
    if( ! file. read( mn, strlen( magic_number ) ) )
       std::cerr << "Unable to read file " << file. getFileName() << " ... " << std::endl;
       return false;
    if( strncmp( mn, magic_number, 5 ) != 0 &&
-       strncmp( mn, "SIM33", 5 ) != 0 ) return false;
-   if( ! type. load( file ) ) return false;
+       strncmp( mn, "SIM33", 5 ) != 0 )
+   {
+       std::cout << "Not a TNL file (wrong magic number)." << std::endl;
+       return false;
+   }
+   if( ! type. load( file ) )
+   {
+       std::cerr << "Cannot load the object type." << std::endl;
+       return false;
+   }
    return true;
 bool getObjectType( const String& fileName, String& type )
    File binaryFile;
-   if( ! binaryFile. open( fileName, tnlReadMode ) )
+   if( ! binaryFile. open( fileName, IOMode::read ) )
       std::cerr << "I am not able to open the file " << fileName << " for detecting the object inside!" << std::endl;
       return false;
-   bool ret_val = getObjectType( binaryFile, type );
-   binaryFile. close();
-   return ret_val;
+   return getObjectType( binaryFile, type );
 bool parseObjectType( const String& objectType,
diff --git a/src/TNL/Object.h b/src/TNL/Object.h
index ed0ac90de971773bf52b01bb8715b8f43e425908..932361354be5778e661e140ed2dfa72145520447 100644
--- a/src/TNL/Object.h
+++ b/src/TNL/Object.h
@@ -10,7 +10,7 @@
 #pragma once
-#include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/CudaCallable.h>
 #include <TNL/String.h>
 #include <TNL/File.h>
 #include <TNL/Containers/List.h>
@@ -33,8 +33,10 @@ class Object
       //! Basic constructor
+#ifndef HAVE_MIC
       Object() : deprecatedReadMode( false ) {};
        * Type getter. This returns the type in C++ style - for example the returned value
@@ -75,8 +77,9 @@ class Object
       // FIXME: __cuda_callable__ would have to be added to every overriding destructor,
       // even if the object's constructor is not __cuda_callable__
       //   __cuda_callable__
+#ifndef HAVE_MIC
       virtual ~Object(){};
diff --git a/src/TNL/Operators/Advection/CMakeLists.txt b/src/TNL/Operators/Advection/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/Advection/LaxFridrichs.h b/src/TNL/Operators/Advection/LaxFridrichs.h
index f72e7fbd3802805adb556dfc338df92811a3c5e5..9dff03402a5bfef768e8ca608bd8dbbd10a38998 100644
--- a/src/TNL/Operators/Advection/LaxFridrichs.h
+++ b/src/TNL/Operators/Advection/LaxFridrichs.h
@@ -89,14 +89,14 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1 >(); 
          const IndexType& center = entity.getIndex(); 
-         const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); 
+         const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >(); 
          typedef Functions::FunctionAdapter< MeshType, VelocityFunctionType > FunctionAdapter;
          return ( 0.5 / this->tau ) * this->artificialViscosity * ( u[ west ]- 2.0 * u[ center ] + u[ east ] ) -
                 FunctionAdapter::getValue( this->velocityField.template getData< DeviceType >()[ 0 ], entity, time ) * ( u[ east ] - u[ west ] ) * hxInverse * 0.5;
@@ -181,18 +181,18 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1, 0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts< 0, -1 >(); 
          const IndexType& center = entity.getIndex();
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >(); 
          typedef Functions::FunctionAdapter< MeshType, VelocityFunctionType > FunctionAdapter;
          return ( 0.25 / this->tau ) * this->artificialViscosity * ( u[ west ] + u[ east ] + u[ north ] + u[ south ] - 4.0 * u[ center ] ) -
@@ -279,20 +279,20 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimensions." ); 
+         static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." ); 
          static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
          const RealType& hxInverse = entity.getMesh().template getSpaceStepsProducts< -1,  0,  0 >(); 
          const RealType& hyInverse = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >(); 
          const RealType& hzInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >(); 
          const IndexType& center = entity.getIndex();
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >(); 
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >(); 
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >(); 
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >(); 
          typedef Functions::FunctionAdapter< MeshType, VelocityFunctionType > FunctionAdapter;
          return ( 0.25 / this->tau ) * this->artificialViscosity * ( u[ west ] + u[ east ] + u[ north ] + u[ south ] + u[ up ] + u[ down ] - 6.0 * u[ center ] ) -
diff --git a/src/TNL/Operators/Analytic/CMakeLists.txt b/src/TNL/Operators/Analytic/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/Analytic/Identity.h b/src/TNL/Operators/Analytic/Identity.h
index b8eb3f9a92901e8384370de6f49b66c226954570..b0c21a57ea0fd49b35067c6614488bb8e7177931 100644
--- a/src/TNL/Operators/Analytic/Identity.h
+++ b/src/TNL/Operators/Analytic/Identity.h
@@ -52,11 +52,11 @@ class Identity : public Functions::Domain< Dimensions, Functions::SpaceDomain >
                                      const PointType& vertex,
                                      const RealType& time = 0 ) const
-         return function.getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
+         return function.template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time );
 } // namespace Analytic
 } // namespace Operators
-} // namespace TNL
\ No newline at end of file
+} // namespace TNL
diff --git a/src/TNL/Operators/CMakeLists.txt b/src/TNL/Operators/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/DirichletBoundaryConditions.h b/src/TNL/Operators/DirichletBoundaryConditions.h
index bd2d54739250d80bf493aace2d22e267cf11adc6..759db071cf8af223abedc6899e1787f45ab76ee3 100644
--- a/src/TNL/Operators/DirichletBoundaryConditions.h
+++ b/src/TNL/Operators/DirichletBoundaryConditions.h
@@ -22,7 +22,7 @@ template< typename Mesh,
           typename Function = Functions::Analytic::Constant< Mesh::getMeshDimension(), typename Mesh::RealType >,
           int MeshEntitiesDimension = Mesh::getMeshDimension(),
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class DirichletBoundaryConditions
 : public Operator< Mesh,
@@ -43,7 +43,7 @@ class DirichletBoundaryConditions
       typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
       typedef typename MeshType::PointType PointType;
-      static constexpr int getDimension() { return MeshType::meshDimension; }
+      static constexpr int getMeshDimension() { return MeshType::getMeshDimension(); }
       static void configSetup( Config::ConfigDescription& config,
                                const String& prefix = "" )
diff --git a/src/TNL/Operators/FiniteDifferences.h b/src/TNL/Operators/FiniteDifferences.h
index 36b8a3948bc9c3b1534258d741eb03d193dcf940..24ca76f82ee9009d6dc7e98f62b6a96340130f59 100644
--- a/src/TNL/Operators/FiniteDifferences.h
+++ b/src/TNL/Operators/FiniteDifferences.h
@@ -33,18 +33,6 @@ class FiniteDifferences< Meshes::Grid< 1, Real, Device, Index > >
    typedef typename GridType::Cell CellType;
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const GridFunction& inFunction,
-                                  GridFunction& outFunction );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -55,20 +43,7 @@ class FiniteDifferences< Meshes::Grid< 1, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const GridFunction& inFunction,
                                   GridFunction& outFunction );
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const CellType& cell,
-                                  const GridFunction& function );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -79,8 +54,6 @@ class FiniteDifferences< Meshes::Grid< 1, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const CellType& cell,
                                   const GridFunction& function );
 template< typename Real, typename Device, typename Index >
@@ -96,18 +69,6 @@ class FiniteDifferences< Meshes::Grid< 2, Real, Device, Index > >
    typedef typename GridType::Cell CellType;
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const GridFunction& inFunction,
-                                  GridFunction& outFunction );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -118,20 +79,7 @@ class FiniteDifferences< Meshes::Grid< 2, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const GridFunction& inFunction,
                                   GridFunction& outFunction );
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const CellType& cell,
-                                  const GridFunction& function );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -142,8 +90,6 @@ class FiniteDifferences< Meshes::Grid< 2, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const CellType& cell,
                                   const GridFunction& function );
 template< typename Real, typename Device, typename Index >
@@ -158,18 +104,6 @@ class FiniteDifferences< Meshes::Grid< 3, Real, Device, Index > >
    //typedef typename GridType::CoordinatesType CoordinatesType;
    typedef typename GridType::Cell CellType;
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const GridFunction& inFunction,
-                                  GridFunction& outFunction );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -180,20 +114,7 @@ class FiniteDifferences< Meshes::Grid< 3, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const GridFunction& inFunction,
                                   GridFunction& outFunction );
-#ifdef HAVE_NOT_CXX11
-   template< typename GridFunction,
-             int XDifferenceOrder,
-             int YDifferenceOrder,
-             int ZDifferenceOrder,
-             int XDifferenceDirection,
-             int YDifferenceDirection,
-             int ZDifferenceDirection >
-   static RealType getDifference( const GridType& grid,
-                                  const CellType& cell,
-                                  const GridFunction& function );
    template< typename GridFunction,
              int XDifferenceOrder,
              int YDifferenceOrder,
@@ -204,11 +125,9 @@ class FiniteDifferences< Meshes::Grid< 3, Real, Device, Index > >
    static RealType getDifference( const GridType& grid,
                                   const CellType& cell,
                                   const GridFunction& function );
 } // namespace Operators
 } // namespace TNL
-#include <TNL/Operators/FiniteDifferences_impl.h>
\ No newline at end of file
+#include <TNL/Operators/FiniteDifferences_impl.h>
diff --git a/src/TNL/Operators/FiniteDifferences_impl.h b/src/TNL/Operators/FiniteDifferences_impl.h
index 51cc742999d7fc40bdb2e5fd75363ecc3ec6a335..3d5b5bace6fa1cd9d9566581fbe113ebba06437d 100644
--- a/src/TNL/Operators/FiniteDifferences_impl.h
+++ b/src/TNL/Operators/FiniteDifferences_impl.h
@@ -69,22 +69,22 @@ Real FiniteDifferences< Meshes::Grid< 1, Real, Device, Index > >::getDifference(
    if( YDifferenceOrder > 0 || ZDifferenceOrder > 0 )
       return 0.0;
    const RealType hx = grid.getSpaceSteps().x();
-   auto neighbourEntities = cell.getNeighbourEntities();
+   auto neighborEntities = cell.getNeighborEntities();
    IndexType cellIndex = grid.getEntityIndex( cell );
    if( XDifferenceOrder == 1 )
       if( XDifferenceDirection == 0 )
-         return ( function[ neighbourEntities.template getEntityIndex< 1 >() ] -
-                  function[ neighbourEntities.template getEntityIndex< -1 >() ] ) / ( 2.0 * hx );
+         return ( function[ neighborEntities.template getEntityIndex< 1 >() ] -
+                  function[ neighborEntities.template getEntityIndex< -1 >() ] ) / ( 2.0 * hx );
-         return ( function[ neighbourEntities.template getEntityIndex< XDifferenceDirection >() ] -
+         return ( function[ neighborEntities.template getEntityIndex< XDifferenceDirection >() ] -
                   function[ cellIndex ] ) / ( XDifferenceDirection * hx );
    if( XDifferenceOrder == 2 )
-      return ( function[ neighbourEntities.template getEntityIndex< 1 >() ] -
+      return ( function[ neighborEntities.template getEntityIndex< 1 >() ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< -1 >() ] ) / (  hx * hx );
+               function[ neighborEntities.template getEntityIndex< -1 >() ] ) / (  hx * hx );
@@ -121,33 +121,33 @@ Real FiniteDifferences< Meshes::Grid< 2, Real, Device, Index > >::getDifference(
    if( ZDifferenceOrder > 0 )
       return 0.0;
-   auto neighbourEntities = cell.getNeighbourEntities();
+   auto neighborEntities = cell.getNeighborEntities();
    IndexType cellIndex = grid.getEntityIndex( cell );
    if( XDifferenceOrder == 1 )
       const RealType hx = grid.getSpaceSteps().x();
-      return ( function[ neighbourEntities.template getEntityIndex< XDifferenceDirection, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< XDifferenceDirection, 0 >( cellIndex ) ] -
                function[ cellIndex ] ) / ( XDifferenceDirection * hx );
    if( XDifferenceOrder == 2 )
       const RealType hx = grid.getSpaceSteps().x();
-      return ( function[ neighbourEntities.template getEntityIndex< 1, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 1, 0 >( cellIndex ) ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< -1, 0 >( cellIndex ) ] ) / (  hx * hx );
+               function[ neighborEntities.template getEntityIndex< -1, 0 >( cellIndex ) ] ) / (  hx * hx );
    if( YDifferenceOrder == 1 )
       const RealType hy = grid.getSpaceSteps().y();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, YDifferenceDirection >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, YDifferenceDirection >( cellIndex ) ] -
                function[ cellIndex ] ) / ( YDifferenceDirection * hy );
    if( YDifferenceOrder == 2 )
       const RealType hy = grid.getSpaceSteps().y();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, 1 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, 1 >( cellIndex ) ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< 0, -1 >( cellIndex ) ] ) / (  hy * hy );
+               function[ neighborEntities.template getEntityIndex< 0, -1 >( cellIndex ) ] ) / (  hy * hy );
@@ -184,47 +184,47 @@ Real FiniteDifferences< Meshes::Grid< 3, Real, Device, Index > >::getDifference(
                                                                                const CellType& cell,
                                                                                const GridFunction& function )
-   auto neighbourEntities = cell.getNeighbourEntities();
+   auto neighborEntities = cell.getNeighborEntities();
    IndexType cellIndex = grid.getEntityIndex( cell );
    if( XDifferenceOrder == 1 )
       const RealType hx = grid.getSpaceSteps().x();
-      return ( function[ neighbourEntities.template getEntityIndex< XDifferenceDirection, 0, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< XDifferenceDirection, 0, 0 >( cellIndex ) ] -
                function[ cellIndex ] ) / ( XDifferenceDirection * hx );
    if( XDifferenceOrder == 2 )
       const RealType hx = grid.getSpaceSteps().x();
-      return ( function[ neighbourEntities.template getEntityIndex< 1, 0, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 1, 0, 0 >( cellIndex ) ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< -1, 0, 0 >( cellIndex ) ] ) / (  hx * hx );
+               function[ neighborEntities.template getEntityIndex< -1, 0, 0 >( cellIndex ) ] ) / (  hx * hx );
    if( YDifferenceOrder == 1 )
       const RealType hy = grid.getSpaceSteps().y();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, YDifferenceDirection, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, YDifferenceDirection, 0 >( cellIndex ) ] -
                function[ cellIndex ] ) / ( YDifferenceDirection * hy );
    if( YDifferenceOrder == 2 )
       const RealType hy = grid.getSpaceSteps().y();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, 1, 0 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, 1, 0 >( cellIndex ) ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< 0, -1, 0 >( cellIndex ) ] ) / (  hy * hy );
+               function[ neighborEntities.template getEntityIndex< 0, -1, 0 >( cellIndex ) ] ) / (  hy * hy );
    if( ZDifferenceOrder == 1 )
       const RealType hz = grid.getSpaceSteps().z();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, 0, ZDifferenceDirection >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, 0, ZDifferenceDirection >( cellIndex ) ] -
                function[ cellIndex ] ) / ( ZDifferenceDirection * hz );
    if( ZDifferenceOrder == 2 )
       const RealType hz = grid.getSpaceSteps().z();
-      return ( function[ neighbourEntities.template getEntityIndex< 0, 0, 1 >( cellIndex ) ] -
+      return ( function[ neighborEntities.template getEntityIndex< 0, 0, 1 >( cellIndex ) ] -
                2.0 * function[ cellIndex ] +
-               function[ neighbourEntities.template getEntityIndex< 0, 0, -1 >( cellIndex ) ] ) / (  hz * hz );
+               function[ neighborEntities.template getEntityIndex< 0, 0, -1 >( cellIndex ) ] ) / (  hz * hz );
diff --git a/src/TNL/Operators/NeumannBoundaryConditions.h b/src/TNL/Operators/NeumannBoundaryConditions.h
index aa39235068ea28f07258eea3e136aaba379f1e71..15cfbe85c68e8e986e8d50dc8f1c83c24c36fb7b 100644
--- a/src/TNL/Operators/NeumannBoundaryConditions.h
+++ b/src/TNL/Operators/NeumannBoundaryConditions.h
@@ -19,7 +19,7 @@ namespace Operators {
 template< typename Mesh,
           typename Function,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class NeumannBoundaryConditions
@@ -121,13 +121,13 @@ class NeumannBoundaryConditions< Meshes::Grid< 1, MeshReal, Device, MeshIndex >,
                               const RealType& time = 0 ) const
       const MeshType& mesh = entity.getMesh();
-      const auto& neighbourEntities = entity.getNeighbourEntities();
+      const auto& neighborEntities = entity.getNeighborEntities();
       const IndexType& index = entity.getIndex();
       if( entity.getCoordinates().x() == 0 )
-         return u[ neighbourEntities.template getEntityIndex< 1 >() ] + entity.getMesh().getSpaceSteps().x() * 
+         return u[ neighborEntities.template getEntityIndex< 1 >() ] + entity.getMesh().getSpaceSteps().x() * 
             Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
-         return u[ neighbourEntities.template getEntityIndex< -1 >() ] + entity.getMesh().getSpaceSteps().x() * 
+         return u[ neighborEntities.template getEntityIndex< -1 >() ] + entity.getMesh().getSpaceSteps().x() * 
             Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );   
@@ -154,19 +154,19 @@ class NeumannBoundaryConditions< Meshes::Grid< 1, MeshReal, Device, MeshIndex >,
                                      Matrix& matrix,
                                      Vector& b ) const
-         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const auto& neighborEntities = entity.getNeighborEntities();
          const IndexType& index = entity.getIndex();
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          if( entity.getCoordinates().x() == 0 )
             matrixRow.setElement( 0, index, 1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 1 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() * 
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< -1 >(), -1.0 );
             matrixRow.setElement( 1, index, 1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -215,27 +215,27 @@ class NeumannBoundaryConditions< Meshes::Grid< 2, MeshReal, Device, MeshIndex >,
                                  const RealType& time = 0 ) const
          const MeshType& mesh = entity.getMesh();
-         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const auto& neighborEntities = entity.getNeighborEntities();
          const IndexType& index = entity.getIndex();
          if( entity.getCoordinates().x() == 0 )
-            return u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+            return u[ neighborEntities.template getEntityIndex< 1, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
-            return u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+            return u[ neighborEntities.template getEntityIndex< -1, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().y() == 0 )
-            return u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] + entity.getMesh().getSpaceSteps().y() *
+            return u[ neighborEntities.template getEntityIndex< 0, 1 >() ] + entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          // The following line is commented to avoid compiler warning
          //if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
-            return u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] + entity.getMesh().getSpaceSteps().y() *
+            return u[ neighborEntities.template getEntityIndex< 0, -1 >() ] + entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -261,19 +261,19 @@ class NeumannBoundaryConditions< Meshes::Grid< 2, MeshReal, Device, MeshIndex >,
                               Matrix& matrix,
                               Vector& b ) const
-         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const auto& neighborEntities = entity.getNeighborEntities();
          const IndexType& index = entity.getIndex();
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          if( entity.getCoordinates().x() == 0 )
             matrixRow.setElement( 0, index,                                                1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 1, 0 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< -1, 0 >(), -1.0 );
             matrixRow.setElement( 1, index,                                                 1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -281,13 +281,13 @@ class NeumannBoundaryConditions< Meshes::Grid< 2, MeshReal, Device, MeshIndex >,
          if( entity.getCoordinates().y() == 0 )
             matrixRow.setElement( 0, index,                                                1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 0, 1 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0, -1 >(), -1.0 );
             matrixRow.setElement( 1, index,                                                 1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -334,37 +334,37 @@ class NeumannBoundaryConditions< Meshes::Grid< 3, MeshReal, Device, MeshIndex >,
                                  const RealType& time = 0 ) const
          const MeshType& mesh = entity.getMesh();
-         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const auto& neighborEntities = entity.getNeighborEntities();
          const IndexType& index = entity.getIndex();
          if( entity.getCoordinates().x() == 0 )
-            return u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+            return u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
-            return u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
+            return u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().y() == 0 )
-            return u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] + entity.getMesh().getSpaceSteps().y() *
+            return u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] + entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
-            return u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] + entity.getMesh().getSpaceSteps().y() *
+            return u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ] + entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().z() == 0 )
-            return u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] + entity.getMesh().getSpaceSteps().z() *
+            return u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] + entity.getMesh().getSpaceSteps().z() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          // The following line is commented to avoid compiler warning
          //if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 )
-            return u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] + entity.getMesh().getSpaceSteps().z() *
+            return u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ] + entity.getMesh().getSpaceSteps().z() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -391,19 +391,19 @@ class NeumannBoundaryConditions< Meshes::Grid< 3, MeshReal, Device, MeshIndex >,
                                      Matrix& matrix,
                                      Vector& b ) const
-         const auto& neighbourEntities = entity.getNeighbourEntities();
+         const auto& neighborEntities = entity.getNeighborEntities();
          const IndexType& index = entity.getIndex();
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
          if( entity.getCoordinates().x() == 0 )
             matrixRow.setElement( 0, index,                                                   1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0, 0 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 1, 0, 0 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 )
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< -1, 0, 0 >(), -1.0 );
             matrixRow.setElement( 1, index,                                                    1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().x() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -411,13 +411,13 @@ class NeumannBoundaryConditions< Meshes::Grid< 3, MeshReal, Device, MeshIndex >,
          if( entity.getCoordinates().y() == 0 )
             matrixRow.setElement( 0, index,                                                   1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1, 0 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 0, 1, 0 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().y() * 
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 )
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0, -1, 0 >(), -1.0 );
             matrixRow.setElement( 1, index,                                                    1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().y() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
@@ -425,13 +425,13 @@ class NeumannBoundaryConditions< Meshes::Grid< 3, MeshReal, Device, MeshIndex >,
          if( entity.getCoordinates().z() == 0 )
             matrixRow.setElement( 0, index,                                                   1.0 );
-            matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 0, 1 >(), -1.0 );
+            matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 0, 0, 1 >(), -1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().z() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
          if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 )
-            matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -1.0 );
+            matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0, 0, -1 >(), -1.0 );
             matrixRow.setElement( 1, index,                                                    1.0 );
             b[ index ] = entity.getMesh().getSpaceSteps().z() *
                Functions::FunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time );
diff --git a/src/TNL/Operators/Operator.h b/src/TNL/Operators/Operator.h
index b18e31fc4fbc0b453840733871afdbcd360afc8f..1a78fd5970a73650781f01fd593f40563797a432 100644
--- a/src/TNL/Operators/Operator.h
+++ b/src/TNL/Operators/Operator.h
@@ -20,7 +20,7 @@ template< typename Mesh,
           int PreimageEntitiesDimension = Mesh::getMeshDimension(),
           int ImageEntitiesDimension = Mesh::getMeshDimension(),
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class Operator : public Functions::Domain< Mesh::getMeshDimension(), DomainType >
@@ -28,12 +28,12 @@ class Operator : public Functions::Domain< Mesh::getMeshDimension(), DomainType
       typedef Mesh MeshType;
       typedef typename MeshType::RealType MeshRealType;
       typedef typename MeshType::DeviceType DeviceType;
-      typedef typename MeshType::IndexType MeshIndexType;
+      typedef typename MeshType::GlobalIndexType MeshIndexType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef void ExactOperatorType;
-      constexpr static int getDimension() { return MeshType::getMeshDimension(); }
+      constexpr static int getMeshDimension() { return MeshType::getMeshDimension(); }
       constexpr static int getPreimageEntitiesDimension() { return PreimageEntitiesDimension; }
       constexpr static int getImageEntitiesDimension() { return ImageEntitiesDimension; }
diff --git a/src/TNL/Operators/diffusion/CMakeLists.txt b/src/TNL/Operators/diffusion/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
index 93e5d48e95acc6b9a66378d1f449791925900ec0..efb17555547b53da1b850b789b6956674c1abbdf 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h
@@ -25,7 +25,7 @@ template< typename Mesh,
           typename NonlinearDiffusionOperator,
 	  typename OperatorQ,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class FiniteVolumeNonlinearOperator
diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
index 7242115f36088d30eb65d192ac18ec98e24474a5..083160467875cc0e4f40b15c63b7cf59c222a68b 100644
--- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
+++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h
@@ -130,14 +130,14 @@ operator()( const MeshEntity& entity,
             const Vector& u,
             const Real& time ) const
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const IndexType& cellIndex = entity.getIndex();
    return operatorQ( entity, u, time ) * 
-      ( (  u[ neighbourEntities.template getEntityIndex<  1, 0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -2, 0 >() / operatorQ.operator()( entity, u, time, 1 )
-      + (  u[ neighbourEntities.template getEntityIndex<  0, 1 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, -2 >() / operatorQ.operator()( entity, u, time, 0, 1 ) 
-      - ( -u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] + u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -2, 0 >() / operatorQ.operator()( entity, u, time, -1)
-      - ( -u[ neighbourEntities.template getEntityIndex<  0,-1 >() ] + u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, -2 >() / operatorQ.operator()( entity, u, time, 0, -1) );
+      ( (  u[ neighborEntities.template getEntityIndex<  1, 0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -2, 0 >() / operatorQ.operator()( entity, u, time, 1 )
+      + (  u[ neighborEntities.template getEntityIndex<  0, 1 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, -2 >() / operatorQ.operator()( entity, u, time, 0, 1 ) 
+      - ( -u[ neighborEntities.template getEntityIndex< -1, 0 >() ] + u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -2, 0 >() / operatorQ.operator()( entity, u, time, -1)
+      - ( -u[ neighborEntities.template getEntityIndex<  0,-1 >() ] + u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, -2 >() / operatorQ.operator()( entity, u, time, 0, -1) );
 template< typename MeshReal,
@@ -180,7 +180,7 @@ setMatrixElements( const RealType& time,
                     Matrix& matrix ) const
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
    const RealType aCoef = - tau * operatorQ.operator()( entity, u, time ) * mesh.template getSpaceStepsProducts< 0, -2 >() / 
                        operatorQ.operator()( entity, u, time, 0, -1 );
    const RealType bCoef = - tau * operatorQ.operator()( entity, u, time ) * mesh.template getSpaceStepsProducts< -2, 0 >() / 
@@ -194,11 +194,11 @@ setMatrixElements( const RealType& time,
                        operatorQ.operator()( entity, u, time, 1 );
    const RealType eCoef = - tau * operatorQ.operator()( entity, u, time ) * mesh.template getSpaceStepsProducts< 0, -2 >() / 
                        operatorQ.operator()(  entity, u, time, 0, 1 );
-   matrixRow.setElement( 0, neighbourEntities.template getEntityIndex<  0, -1 >(), aCoef );
-   matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< -1,  0 >(), bCoef );
+   matrixRow.setElement( 0, neighborEntities.template getEntityIndex<  0, -1 >(), aCoef );
+   matrixRow.setElement( 1, neighborEntities.template getEntityIndex< -1,  0 >(), bCoef );
    matrixRow.setElement( 2, entity.getIndex(),                                     cCoef );
-   matrixRow.setElement( 3, neighbourEntities.template getEntityIndex<  1,  0 >(), dCoef );
-   matrixRow.setElement( 4, neighbourEntities.template getEntityIndex<  0,  1 >(), eCoef );
+   matrixRow.setElement( 3, neighborEntities.template getEntityIndex<  1,  0 >(), dCoef );
+   matrixRow.setElement( 4, neighborEntities.template getEntityIndex<  0,  1 >(), eCoef );
 template< typename MeshReal,
@@ -233,21 +233,21 @@ operator()( const MeshEntity& entity,
             const Vector& u,
             const Real& time ) const
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const IndexType& cellIndex = entity.getIndex();
    return operatorQ( entity, u, time ) * 
-      ( (u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ]) 
+      ( (u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ]) 
           * mesh.template getSpaceStepsProducts< -2, 0, 0 >() / operatorQ( entity, u, time, 1 )
-          + ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, -2, 0 >()/
+          + ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, -2, 0 >()/
           operatorQ( entity, u, time, 0, 1 ) 
-          + ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, 0, -2 >()/
+          + ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, 0, -2 >()/
           operatorQ( entity, u, time, 0, 0, 1 ) 
-          - ( - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ]  + u[ cellIndex ]) 
+          - ( - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ]  + u[ cellIndex ]) 
           * mesh.template getSpaceStepsProducts< -2, 0, 0 >() / operatorQ( entity, u, time, -1)
-          -( - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] + u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, -2, 0 >()
+          -( - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] + u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, -2, 0 >()
           /operatorQ( entity, u, time, 0, -1) 
-          -( - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] + u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, 0, -2 >()
+          -( - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] + u[ cellIndex ]) * mesh.template getSpaceStepsProducts< 0, 0, -2 >()
           /operatorQ( entity, u, time, 0, 0, -1) );
@@ -293,7 +293,7 @@ setMatrixElements( const RealType& time,
                     Matrix& matrix ) const
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
    const RealType aCoef = - tau * operatorQ( entity, u, time ) *
                        mesh.template getSpaceStepsProducts< 0, 0, -2 >() / operatorQ.operator()( entity, u, time, 0, 0, -1 );
    const RealType bCoef = - tau * operatorQ( entity, u, time ) * 
@@ -313,13 +313,13 @@ setMatrixElements( const RealType& time,
                        mesh.template getSpaceStepsProducts< 0, -2, 0 >() / operatorQ.operator()( entity, u, time, 0, 1, 0 );
    const RealType gCoef = - tau * operatorQ.operator()( entity, u, time ) * 
                        mesh.template getSpaceStepsProducts< 0, 0, -2 >() / operatorQ.operator()( entity, u, time, 0, 0, 1 );
-   matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0,0,-1 >(), aCoef );
-   matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0,-1,0 >(), bCoef );
-   matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< -1,0,0 >(), cCoef );
+   matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0,0,-1 >(), aCoef );
+   matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 0,-1,0 >(), bCoef );
+   matrixRow.setElement( 2, neighborEntities.template getEntityIndex< -1,0,0 >(), cCoef );
    matrixRow.setElement( 3, entity.getIndex(),                                     dCoef );
-   matrixRow.setElement( 4, neighbourEntities.template getEntityIndex< 1,0,0 >(),  eCoef );
-   matrixRow.setElement( 5, neighbourEntities.template getEntityIndex< 0,1,0 >(),  fCoef );
-   matrixRow.setElement( 6, neighbourEntities.template getEntityIndex< 0,0,1 >(),  gCoef );
+   matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 1,0,0 >(),  eCoef );
+   matrixRow.setElement( 5, neighborEntities.template getEntityIndex< 0,1,0 >(),  fCoef );
+   matrixRow.setElement( 6, neighborEntities.template getEntityIndex< 0,0,1 >(),  gCoef );
 } // namespace Operators
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion.h b/src/TNL/Operators/diffusion/LinearDiffusion.h
index 8844e547ad9f8b9f53f12b47eddb08caf8d30f7c..ff9c05f03238eb5e10869f4a1dc2d8a6cd2d821a 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion.h
@@ -27,7 +27,7 @@ namespace Operators {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class LinearDiffusion
@@ -52,7 +52,7 @@ class LinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, Inde
       typedef Index IndexType;
       typedef ExactLinearDiffusion< 1 > ExactOperatorType;
-      static const int Dimension = MeshType::meshDimension;
+      static const int Dimension = MeshType::getMeshDimension();
       static constexpr int getDimension() { return Dimension; }
@@ -103,7 +103,7 @@ class LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind
       typedef Index IndexType;
       typedef ExactLinearDiffusion< 2 > ExactOperatorType;
-      static const int Dimension = MeshType::meshDimension;
+      static const int Dimension = MeshType::getMeshDimension();
       static constexpr int getDimension() { return Dimension; }
@@ -153,7 +153,7 @@ class LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Ind
       typedef Index IndexType;
       typedef ExactLinearDiffusion< 3 > ExactOperatorType;
-      static const int Dimension = MeshType::meshDimension;
+      static const int Dimension = MeshType::getMeshDimension();
       static constexpr int getDimension() { return Dimension; }
diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
index c8bc8a3d9b9f30254d7c4247ac356715543b2c02..83a20829ccc4f46a56eb80b1e474990db23856da 100644
--- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
+++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h
@@ -52,13 +52,13 @@ operator()( const PreimageFunction& u,
             const MeshEntity& entity,
             const Real& time ) const
-   static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimension." );
+   static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 1, "Wrong preimage function" );
-   const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< - 2 >();
-   return ( u[ neighbourEntities.template getEntityIndex< -1 >() ]
+   return ( u[ neighborEntities.template getEntityIndex< -1 >() ]
             - 2.0 * u[ entity.getIndex() ]
-            + u[ neighbourEntities.template getEntityIndex< 1 >() ] ) * hxSquareInverse;
+            + u[ neighborEntities.template getEntityIndex< 1 >() ] ) * hxSquareInverse;
 template< typename MeshReal,
@@ -98,15 +98,15 @@ setMatrixElements( const PreimageFunction& u,
                    Matrix& matrix,
                    Vector& b ) const
-   static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimension." );
+   static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 1, "Wrong preimage function" );
-   const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
    const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
    const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2 >();
-   matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(),      - lambdaX );
+   matrixRow.setElement( 0, neighborEntities.template getEntityIndex< -1 >(),      - lambdaX );
    matrixRow.setElement( 1, index,                                              2.0 * lambdaX );
-   matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< 1 >(),       - lambdaX );
+   matrixRow.setElement( 2, neighborEntities.template getEntityIndex< 1 >(),       - lambdaX );
 template< typename MeshReal,
@@ -157,15 +157,15 @@ operator()( const PreimageFunction& u,
             const EntityType& entity,
             const Real& time ) const
-   static_assert( EntityType::entityDimension == 2, "Wrong mesh entity dimension." );
+   static_assert( EntityType::getEntityDimension() == 2, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 2, "Wrong preimage function" );
-   const typename EntityType::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename EntityType::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
-   return ( u[ neighbourEntities.template getEntityIndex< -1,  0 >() ]
-          + u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] ) * hxSquareInverse +
-          ( u[ neighbourEntities.template getEntityIndex<  0, -1 >() ]
-          + u[ neighbourEntities.template getEntityIndex<  0,  1 >() ] ) * hySquareInverse
+   return ( u[ neighborEntities.template getEntityIndex< -1,  0 >() ]
+          + u[ neighborEntities.template getEntityIndex<  1,  0 >() ] ) * hxSquareInverse +
+          ( u[ neighborEntities.template getEntityIndex<  0, -1 >() ]
+          + u[ neighborEntities.template getEntityIndex<  0,  1 >() ] ) * hySquareInverse
           - 2.0 * u[ entity.getIndex() ] * ( hxSquareInverse + hySquareInverse );
@@ -189,18 +189,18 @@ setMatrixElements( const PreimageFunction& u,
                    Matrix& matrix,
                    Vector& b ) const
-   static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimension." );
+   static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 2, "Wrong preimage function" );
    const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
    const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
    const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
-   matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -lambdaY );
-   matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< -1, 0 >(), -lambdaX );
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
+   matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0, -1 >(), -lambdaY );
+   matrixRow.setElement( 1, neighborEntities.template getEntityIndex< -1, 0 >(), -lambdaX );
    matrixRow.setElement( 2, index,                                                        2.0 * ( lambdaX + lambdaY ) );
-   matrixRow.setElement( 3, neighbourEntities.template getEntityIndex< 1, 0 >(),   -lambdaX );
-   matrixRow.setElement( 4, neighbourEntities.template getEntityIndex< 0, 1 >(),   -lambdaY );
+   matrixRow.setElement( 3, neighborEntities.template getEntityIndex< 1, 0 >(),   -lambdaX );
+   matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 0, 1 >(),   -lambdaY );
@@ -234,18 +234,18 @@ operator()( const PreimageFunction& u,
             const EntityType& entity,
             const Real& time ) const
-   static_assert( EntityType::entityDimension == 3, "Wrong mesh entity dimension." );
+   static_assert( EntityType::getEntityDimension() == 3, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 3, "Wrong preimage function" );
-   const typename EntityType::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename EntityType::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >();
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >();
    const RealType& hzSquareInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >();
-   return (   u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ]
-            + u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] ) * hxSquareInverse +
-          (   u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ]
-            + u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] ) * hySquareInverse +
-          (   u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ]
-            + u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] ) * hzSquareInverse
+   return (   u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ]
+            + u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] ) * hxSquareInverse +
+          (   u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ]
+            + u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] ) * hySquareInverse +
+          (   u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ]
+            + u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] ) * hzSquareInverse
          - 2.0 * u[ entity.getIndex() ] * ( hxSquareInverse + hySquareInverse + hzSquareInverse );
@@ -286,21 +286,21 @@ setMatrixElements( const PreimageFunction& u,
                    Matrix& matrix,
                    Vector& b ) const
-   static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimension." );
+   static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimensions." );
    static_assert( PreimageFunction::getEntitiesDimension() == 3, "Wrong preimage function" );
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
    const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
    const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >();
    const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >();
    const RealType lambdaZ = tau * entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >();
-   matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -lambdaZ );
-   matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -lambdaY );
-   matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -lambdaX );
+   matrixRow.setElement( 0, neighborEntities.template getEntityIndex< 0, 0, -1 >(), -lambdaZ );
+   matrixRow.setElement( 1, neighborEntities.template getEntityIndex< 0, -1, 0 >(), -lambdaY );
+   matrixRow.setElement( 2, neighborEntities.template getEntityIndex< -1, 0, 0 >(), -lambdaX );
    matrixRow.setElement( 3, index,                             2.0 * ( lambdaX + lambdaY + lambdaZ ) );
-   matrixRow.setElement( 4, neighbourEntities.template getEntityIndex< 1, 0, 0 >(),   -lambdaX );
-   matrixRow.setElement( 5, neighbourEntities.template getEntityIndex< 0, 1, 0 >(),   -lambdaY );
-   matrixRow.setElement( 6, neighbourEntities.template getEntityIndex< 0, 0, 1 >(),   -lambdaZ );
+   matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 1, 0, 0 >(),   -lambdaX );
+   matrixRow.setElement( 5, neighborEntities.template getEntityIndex< 0, 1, 0 >(),   -lambdaY );
+   matrixRow.setElement( 6, neighborEntities.template getEntityIndex< 0, 0, 1 >(),   -lambdaZ );
 } // namespace Operators
diff --git a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
index 5e3e4d59c2aa98d09d8cbee72a105bf2dcfb37c6..91f9848cbc9bc63faca5538913f1ec4c9a1701b0 100644
--- a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
+++ b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h
@@ -30,7 +30,7 @@ namespace Operators {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType,
+          typename Index = typename Mesh::GlobalIndexType,
           bool EvaluateNonlinearityOnFly = false >
 class OneSidedMeanCurvature
    : public Operator< Mesh, Functions::MeshInteriorDomain, Mesh::getMeshDimension(), Mesh::getMeshDimension(), Real, Index >
diff --git a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
index 606ab6a05ddca3474b9bfa7ed535e9d89380dbb7..b74abe0b06de5898e86679dcd537d9656cbfcc9b 100644
--- a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
+++ b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h
@@ -26,7 +26,7 @@ namespace Operators {
 template< typename Mesh,
           typename Nonlinearity,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class OneSidedNonlinearDiffusion
@@ -69,12 +69,12 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >,
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
          const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2 >();
          const IndexType& center = entity.getIndex();
-         const IndexType& east = neighbourEntities.template getEntityIndex<  1 >();
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex<  1 >();
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
          const RealType& u_c = u[ center ];
          const RealType u_x_f = ( u[ east ] - u_c );
          const RealType u_x_b = ( u_c - u[ west ] );
@@ -105,10 +105,10 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >,
                                      Vector& b ) const
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const IndexType& center = entity.getIndex();
-         const IndexType& east = neighbourEntities.template getEntityIndex<  1 >();
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex<  1 >();
+         const IndexType& west = neighborEntities.template getEntityIndex< -1 >();
          const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2 >();
          const RealType& nonlinearity_center = this->nonlinearity[ center ];
          const RealType& nonlinearity_west = this->nonlinearity[ west ];
@@ -163,15 +163,15 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
          const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2,  0 >();
          const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts<  0, -2 >();
          const IndexType& center = entity.getIndex();
-         const IndexType& east = neighbourEntities.template getEntityIndex<  1, 0 >();
-         const IndexType& west = neighbourEntities.template getEntityIndex< -1, 0 >();
-         const IndexType& north = neighbourEntities.template getEntityIndex< 0,  1 >();
-         const IndexType& south = neighbourEntities.template getEntityIndex< 0, -1 >();
+         const IndexType& east = neighborEntities.template getEntityIndex<  1, 0 >();
+         const IndexType& west = neighborEntities.template getEntityIndex< -1, 0 >();
+         const IndexType& north = neighborEntities.template getEntityIndex< 0,  1 >();
+         const IndexType& south = neighborEntities.template getEntityIndex< 0, -1 >();
          const RealType& u_c = u[ center ];
          const RealType u_x_f = ( u[ east ] - u_c );
          const RealType u_x_b = ( u_c - u[ west ] );
@@ -205,12 +205,12 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >
                                      Vector& b ) const
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const IndexType& center = entity.getIndex();
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >();
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >();
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >();
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >();
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >();
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >();
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();
          const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2,  0 >();
          const RealType lambda_y = tau * entity.getMesh().template getSpaceStepsProducts<  0, -2 >();
          const RealType& nonlinearity_center = this->nonlinearity[ center ];
@@ -272,18 +272,18 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
                        const MeshEntity& entity,
                        const RealType& time = 0.0 ) const
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const typename MeshEntity::MeshType& mesh = entity.getMesh();
          const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >();
          const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >();
          const RealType& hz_div = entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >();
          const IndexType& center = entity.getIndex();
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >();
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >();
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >();
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >();
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >();
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >();
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >();
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >();
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType& u_c = u[ center ];
          const RealType u_x_f = ( u[ east ] - u_c );
@@ -322,14 +322,14 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >
                                      Vector& b ) const
          typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const IndexType& center = entity.getIndex();
-         const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >();
-         const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >();
-         const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >();
-         const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();
-         const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >();
-         const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();
+         const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >();
+         const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >();
+         const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >();
+         const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();
+         const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >();
+         const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();
          const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >();
diff --git a/src/TNL/Operators/euler/CMakeLists.txt b/src/TNL/Operators/euler/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/euler/fvm/CMakeLists.txt b/src/TNL/Operators/euler/fvm/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
index d0c650b0a1bf3d4f1d9a90952a7548f2b9700506..b2cc2ff06d267933fbacb78be213a24a8496ef5f 100644
--- a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
+++ b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h
@@ -147,14 +147,14 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, Pressur
                                                                                                               RealType& rho_u2_t,
                                                                                                               const RealType& tau ) const
-   TNL_ASSERT( mesh, std::cerr << "No mesh has been binded with the Lax-Fridrichs scheme." );
-   TNL_ASSERT( pressureGradient, std::cerr << "No pressure gradient was set in the the Lax-Fridrichs scheme." )
+   TNL_ASSERT_TRUE( mesh, "No mesh has been binded with the Lax-Fridrichs scheme." );
+   TNL_ASSERT_TRUE( pressureGradient, "No pressure gradient was set in the the Lax-Fridrichs scheme." )
    const IndexType& c = centralVolume;
-   const IndexType e = this->mesh -> getElementNeighbour( centralVolume,  1,  0 );
-   const IndexType w = this->mesh -> getElementNeighbour( centralVolume, -1,  0 );
-   const IndexType n = this->mesh -> getElementNeighbour( centralVolume,  0,  1 );
-   const IndexType s = this->mesh -> getElementNeighbour( centralVolume,  0, -1 );
+   const IndexType e = this->mesh -> getElementNeighbor( centralVolume,  1,  0 );
+   const IndexType w = this->mesh -> getElementNeighbor( centralVolume, -1,  0 );
+   const IndexType n = this->mesh -> getElementNeighbor( centralVolume,  0,  1 );
+   const IndexType s = this->mesh -> getElementNeighbor( centralVolume,  0, -1 );
    const RealType u1_e = rho_u1[ e ] / regularize( rho[ e ] );
    const RealType u1_w = rho_u1[ w ] / regularize( rho[ w ] );
@@ -168,7 +168,7 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, Pressur
    const RealType u2_w = rho_u2[ w ] / regularize( rho[ w ] );
-    * Get the central volume and its neighbours (east, north, west, south) coordinates
+    * Get the central volume and its neighbors (east, north, west, south) coordinates
    CoordinatesType c_coordinates, e_coordinates, n_coordinates, w_coordinates, s_coordinates;
    this->mesh -> getElementCoordinates( c, c_coordinates );
@@ -398,8 +398,8 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, tnlIdenticalGridGeometr
                                                                                                                           RealType& rho_u2_t,
                                                                                                                           const RealType& tau ) const
-   TNL_ASSERT( mesh, std::cerr << "No mesh has been binded with the Lax-Fridrichs scheme." );
-   TNL_ASSERT( pressureGradient, std::cerr << "No pressure gradient was set in the the Lax-Fridrichs scheme." )
+   TNL_ASSERT_TRUE( mesh, "No mesh has been binded with the Lax-Fridrichs scheme." );
+   TNL_ASSERT_TRUE( pressureGradient, "No pressure gradient was set in the the Lax-Fridrichs scheme." )
    const IndexType& xSize = this->mesh -> getDimensions(). x();
    const IndexType& ySize = this->mesh -> getDimensions(). y();
@@ -407,10 +407,10 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, tnlIdenticalGridGeometr
    const RealType hy = this->mesh -> getParametricStep(). y();
    const IndexType& c = centralVolume;
-   const IndexType e = this->mesh -> getElementNeighbour( centralVolume,  1,  0 );
-   const IndexType w = this->mesh -> getElementNeighbour( centralVolume, -1,  0 );
-   const IndexType n = this->mesh -> getElementNeighbour( centralVolume,  0,  1 );
-   const IndexType s = this->mesh -> getElementNeighbour( centralVolume,  0, -1 );
+   const IndexType e = this->mesh -> getElementNeighbor( centralVolume,  1,  0 );
+   const IndexType w = this->mesh -> getElementNeighbor( centralVolume, -1,  0 );
+   const IndexType n = this->mesh -> getElementNeighbor( centralVolume,  0,  1 );
+   const IndexType s = this->mesh -> getElementNeighbor( centralVolume,  0, -1 );
     * rho_t + ( rho u_1 )_x + ( rho u_2 )_y =  0
@@ -456,8 +456,8 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, tnlIdenticalGridGeometr
                                                                                                                           RealType& e_t,
                                                                                                                           const RealType& tau ) const
-   TNL_ASSERT( mesh, std::cerr << "No mesh has been binded with the Lax-Fridrichs scheme." );
-   TNL_ASSERT( pressureGradient, std::cerr << "No pressure gradient was set in the the Lax-Fridrichs scheme." )
+   TNL_ASSERT_TRUE( mesh, "No mesh has been binded with the Lax-Fridrichs scheme." );
+   TNL_ASSERT_TRUE( pressureGradient, "No pressure gradient was set in the the Lax-Fridrichs scheme." )
    const IndexType& xSize = this->mesh -> getDimensions(). x();
    const IndexType& ySize = this->mesh -> getDimensions(). y();
@@ -465,10 +465,10 @@ void LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, tnlIdenticalGridGeometr
    const RealType hy = this->mesh -> getParametricStep(). y();
    const IndexType& c = centralVolume;
-   const IndexType e = this->mesh -> getElementNeighbour( centralVolume,  1,  0 );
-   const IndexType w = this->mesh -> getElementNeighbour( centralVolume, -1,  0 );
-   const IndexType n = this->mesh -> getElementNeighbour( centralVolume,  0,  1 );
-   const IndexType s = this->mesh -> getElementNeighbour( centralVolume,  0, -1 );
+   const IndexType e = this->mesh -> getElementNeighbor( centralVolume,  1,  0 );
+   const IndexType w = this->mesh -> getElementNeighbor( centralVolume, -1,  0 );
+   const IndexType n = this->mesh -> getElementNeighbor( centralVolume,  0,  1 );
+   const IndexType s = this->mesh -> getElementNeighbor( centralVolume,  0, -1 );
     * rho_t + ( rho u_1 )_x + ( rho u_2 )_y =  0
diff --git a/src/TNL/Operators/fdm/BackwardFiniteDifference.h b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
index ecc94c6a4e34bdae4eb61b856a2513edf25da033..d816499f1e733c4a6cbca7f1a3dc1c0fefe3d320 100644
--- a/src/TNL/Operators/fdm/BackwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/BackwardFiniteDifference.h
@@ -22,7 +22,7 @@ template< typename Mesh,
           int YDifference = 0,
           int ZDifference = 0,
           typename RealType = typename Mesh::RealType,
-          typename IndexType = typename Mesh::IndexType >
+          typename IndexType = typename Mesh::GlobalIndexType >
 class BackwardFiniteDifference
diff --git a/src/TNL/Operators/fdm/CentralFiniteDifference.h b/src/TNL/Operators/fdm/CentralFiniteDifference.h
index a9514d9f3e9354505d72aa675f6cdc0a76a16382..90c9a5887897dd2664e2936514bb9255089c9e2c 100644
--- a/src/TNL/Operators/fdm/CentralFiniteDifference.h
+++ b/src/TNL/Operators/fdm/CentralFiniteDifference.h
@@ -22,7 +22,7 @@ template< typename Mesh,
           int YDifference = 0,
           int ZDifference = 0,
           typename RealType = typename Mesh::RealType,
-          typename IndexType = typename Mesh::IndexType >
+          typename IndexType = typename Mesh::GlobalIndexType >
 class CentralFiniteDifference
diff --git a/src/TNL/Operators/fdm/FiniteDifferences_1D.h b/src/TNL/Operators/fdm/FiniteDifferences_1D.h
index 127d454d159ff058ce5e3d7ec5afd7e6b820f228..622e4c4585169179df679ca352610d06dd2ec4e1 100644
--- a/src/TNL/Operators/fdm/FiniteDifferences_1D.h
+++ b/src/TNL/Operators/fdm/FiniteDifferences_1D.h
@@ -66,10 +66,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 1 >()] - u_c ) * hxDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 1 >()] - u_c ) * hxDiv;
@@ -93,10 +93,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< -1 >()] ) * hxDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< -1 >()] ) * hxDiv;
@@ -120,10 +120,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 1 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< -1 >() ] ) * ( 0.5 * hxDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 1 >() ] -
+                  u[ neighborEntities.template getEntityIndex< -1 >() ] ) * ( 0.5 * hxDiv );
@@ -147,12 +147,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 1 >() ] ) * hxSquareDiv;
@@ -173,12 +173,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< -2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< -2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1 >() ] ) * hxSquareDiv;
@@ -199,12 +199,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 1 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 1 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1 >() ] ) * hxSquareDiv;
diff --git a/src/TNL/Operators/fdm/FiniteDifferences_2D.h b/src/TNL/Operators/fdm/FiniteDifferences_2D.h
index d287b50d210d36abdbe9d373bc4b1567aa70968c..4ea67b93159038745e137e0c6decc17c234e720e 100644
--- a/src/TNL/Operators/fdm/FiniteDifferences_2D.h
+++ b/src/TNL/Operators/fdm/FiniteDifferences_2D.h
@@ -66,10 +66,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 1, 0 >()] - u_c ) * hxDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 1, 0 >()] - u_c ) * hxDiv;
@@ -90,10 +90,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 1 >()] - u_c ) * hyDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 0, 1 >()] - u_c ) * hyDiv;
@@ -117,10 +117,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1,  0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< -1, 0 >()] ) * hxDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< -1, 0 >()] ) * hxDiv;
@@ -141,10 +141,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< 0, -1 >()] ) * hyDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< 0, -1 >()] ) * hyDiv;
@@ -168,10 +168,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1, 0 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] ) * ( 0.5 * hxDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 1, 0 >() ] -
+                  u[ neighborEntities.template getEntityIndex< -1, 0 >() ] ) * ( 0.5 * hxDiv );
@@ -192,10 +192,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 0,  1 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] ) * ( 0.5 * hyDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 0,  1 >() ] -
+                  u[ neighborEntities.template getEntityIndex< 0, -1 >() ] ) * ( 0.5 * hyDiv );
@@ -220,12 +220,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2,0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 2, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 2, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 1, 0 >() ] ) * hxSquareDiv;
@@ -246,12 +246,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< -2, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< -2, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1, 0 >() ] ) * hxSquareDiv;
@@ -272,12 +272,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex<  1, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex<  1, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1, 0 >() ] ) * hxSquareDiv;
@@ -298,12 +298,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, 2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, 1 >() ] ) * hxSquareDiv;
@@ -324,12 +324,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, -2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, -2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, -1 >() ] ) * hxSquareDiv;
@@ -351,12 +351,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          const Real& hySquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0,  1 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0,  1 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] ) * hySquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, -1 >() ] ) * hySquareDiv;
diff --git a/src/TNL/Operators/fdm/FiniteDifferences_3D.h b/src/TNL/Operators/fdm/FiniteDifferences_3D.h
index 569a1c7645e4bf5904875ac208dab390c601050d..895aeaddac1be89b57b308becf80b27cd9576c4d 100644
--- a/src/TNL/Operators/fdm/FiniteDifferences_3D.h
+++ b/src/TNL/Operators/fdm/FiniteDifferences_3D.h
@@ -33,10 +33,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1, 0, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >()] - u_c ) * hxDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 1, 0, 0 >()] - u_c ) * hxDiv;
@@ -57,10 +57,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >()] - u_c ) * hyDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 0, 1, 0 >()] - u_c ) * hyDiv;
@@ -81,10 +81,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hzDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >()] - u_c ) * hzDiv;
+         return ( u[ neighborEntities.template getEntityIndex< 0, 0, 1 >()] - u_c ) * hzDiv;
@@ -108,10 +108,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1, 0, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >()] ) * hxDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< -1, 0, 0 >()] ) * hxDiv;
@@ -132,10 +132,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >()] ) * hyDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< 0, -1, 0 >()] ) * hyDiv;
@@ -156,10 +156,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hzDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -1 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u_c - u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >()] ) * hzDiv;
+         return ( u_c - u[ neighborEntities.template getEntityIndex< 0, 0, -1 >()] ) * hzDiv;
@@ -183,10 +183,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1, 0, 0 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] ) * ( 0.5 * hxDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] -
+                  u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ] ) * ( 0.5 * hxDiv );
@@ -207,10 +207,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hyDiv = entity.getMesh().template getSpaceStepsProducts< 0, -1, 0 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] ) * ( 0.5 * hyDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] -
+                  u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ] ) * ( 0.5 * hyDiv );
@@ -231,10 +231,10 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hzDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -1 >();
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] -
-                  u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] ) * ( 0.5 * hzDiv );
+         return ( u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] -
+                  u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ] ) * ( 0.5 * hzDiv );
@@ -258,12 +258,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 2, 0, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 2, 0, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] ) * hxSquareDiv;
@@ -284,12 +284,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< -2, 0, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< -2, 0, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ] ) * hxSquareDiv;
@@ -310,12 +310,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex<  1, 0, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex<  1, 0, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ] ) * hxSquareDiv;
@@ -336,12 +336,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 2, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, 2, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] ) * hxSquareDiv;
@@ -362,12 +362,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, -2, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, -2, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ] ) * hxSquareDiv;
@@ -388,12 +388,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hySquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0,  1, 0 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0,  1, 0 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] ) * hySquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ] ) * hySquareDiv;
@@ -414,12 +414,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 0, 2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, 0, 2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] ) * hxSquareDiv;
@@ -440,12 +440,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hxSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 0, -2 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, 0, -2 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] ) * hxSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ] ) * hxSquareDiv;
@@ -466,12 +466,12 @@ class FiniteDifferences<
       static Real getValue( const MeshFunction& u,
                             const MeshEntity& entity )
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          const Real& hzSquareDiv = entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >();
          const Real& u_c = u[ entity.getIndex() ];
-         return ( u[ neighbourEntities.template getEntityIndex< 0, 0,  1 >() ] -
+         return ( u[ neighborEntities.template getEntityIndex< 0, 0,  1 >() ] -
                   2.0 * u_c +
-                  u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] ) * hzSquareDiv;
+                  u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ] ) * hzSquareDiv;
diff --git a/src/TNL/Operators/fdm/ForwardFiniteDifference.h b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
index 1de01680cd16c115aaaed34d95437537d3d67e88..9c75e05dadd03213494d1229fe814771ca0f540d 100644
--- a/src/TNL/Operators/fdm/ForwardFiniteDifference.h
+++ b/src/TNL/Operators/fdm/ForwardFiniteDifference.h
@@ -23,7 +23,7 @@ template< typename Mesh,
           int YDifference = 0,
           int ZDifference = 0,
           typename RealType = typename Mesh::RealType,
-          typename IndexType = typename Mesh::IndexType >
+          typename IndexType = typename Mesh::GlobalIndexType >
 class ForwardFiniteDifference
diff --git a/src/TNL/Operators/geometric/CoFVMGradientNorm.h b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
index 47f168656a1def8d48aa8ff56f6531964a4ef98f..0380ecc223f8b1556cf379bfb2dd64a1197ae576 100644
--- a/src/TNL/Operators/geometric/CoFVMGradientNorm.h
+++ b/src/TNL/Operators/geometric/CoFVMGradientNorm.h
@@ -22,7 +22,7 @@ namespace Operators {
 template< typename Mesh,
           int MeshEntityDimension = Mesh::getMeshDimension(),
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class CoFVMGradientNorm
@@ -118,11 +118,11 @@ class CoFVMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, 0, Real,
          "The mesh function u must be stored on mesh cells.." );
       static_assert( MeshEntity::getMeshDimension() == 0,
          "The complementary finite volume gradient norm may be evaluated only on faces." );
-      const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.template getNeighbourEntities< 1 >();
+      const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.template getNeighborEntities< 1 >();
       const RealType& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1 >();
-      const RealType& u_x = ( u[ neighbourEntities.template getEntityIndex<  1 >() ] -
-                              u[ neighbourEntities.template getEntityIndex< -1 >() ] ) * hxDiv;
+      const RealType& u_x = ( u[ neighborEntities.template getEntityIndex<  1 >() ] -
+                              u[ neighborEntities.template getEntityIndex< -1 >() ] ) * hxDiv;
       return ::sqrt( this->epsSquare + ( u_x * u_x ) );
@@ -180,37 +180,37 @@ class CoFVMGradientNorm< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real
          "The mesh function u must be stored on mesh cells.." );
       static_assert( MeshEntity::getMeshDimension() == 1,
          "The complementary finite volume gradient norm may be evaluated only on faces." );
-      const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.template getNeighbourEntities< 2 >();
+      const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.template getNeighborEntities< 2 >();
       const RealType& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1,  0 >();
       const RealType& hyDiv = entity.getMesh().template getSpaceStepsProducts<  0, -1 >();
       if( entity.getOrientation().x() != 0.0 )
          const RealType u_x =
-            ( u[ neighbourEntities.template getEntityIndex<  1, 0 >()] -
-              u[ neighbourEntities.template getEntityIndex< -1, 0 >()] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1, 0 >()] -
+              u[ neighborEntities.template getEntityIndex< -1, 0 >()] ) * hxDiv;
          RealType u_y;
          if( entity.getCoordinates().y() > 0 )
             if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
                u_y = 0.25 *
-                  ( u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-                    u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] -
-                    u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] -
-                    u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] ) * hyDiv;
+                  ( u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+                    u[ neighborEntities.template getEntityIndex< -1,  1 >() ] -
+                    u[ neighborEntities.template getEntityIndex<  1, -1 >() ] -
+                    u[ neighborEntities.template getEntityIndex< -1, -1 >() ] ) * hyDiv;
             else // if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
                u_y = 0.5 *
-                  ( u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] +
-                    u[ neighbourEntities.template getEntityIndex< -1,  0 >() ] -
-                    u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] -
-                    u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] ) * hyDiv;
+                  ( u[ neighborEntities.template getEntityIndex<  1,  0 >() ] +
+                    u[ neighborEntities.template getEntityIndex< -1,  0 >() ] -
+                    u[ neighborEntities.template getEntityIndex<  1, -1 >() ] -
+                    u[ neighborEntities.template getEntityIndex< -1, -1 >() ] ) * hyDiv;
          else // if( entity.getCoordinates().y() > 0 )
             u_y = 0.5 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-                 u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1,  0 >() ] ) * hyDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+                 u[ neighborEntities.template getEntityIndex< -1,  1 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1,  0 >() ] ) * hyDiv;
          return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y );
@@ -219,28 +219,28 @@ class CoFVMGradientNorm< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real
          if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
             u_x = 0.25 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  1, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1, -1 >() ] ) * hxDiv;
          else // if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
             u_x = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  0,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  0, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  0, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1, -1 >() ] ) * hxDiv;
       else // if( entity.getCoordinates().x() > 0 )
          u_x = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  1, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1 >() ] ) * hxDiv;
       const RealType u_y =
-         ( u[ neighbourEntities.template getEntityIndex< 0,  1 >()] -
-           u[ neighbourEntities.template getEntityIndex< 0, -1 >()] ) * hyDiv;
+         ( u[ neighborEntities.template getEntityIndex< 0,  1 >()] -
+           u[ neighborEntities.template getEntityIndex< 0, -1 >()] ) * hyDiv;
       return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y );
@@ -296,43 +296,43 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
          "The mesh function u must be stored on mesh cells.." );
       static_assert( MeshEntity::getMeshDimension() == 2,
          "The complementary finite volume gradient norm may be evaluated only on faces." );
-      const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.template getNeighbourEntities< 3 >();
+      const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.template getNeighborEntities< 3 >();
       const RealType& hxDiv = entity.getMesh().template getSpaceStepsProducts< -1,  0,  0 >();
       const RealType& hyDiv = entity.getMesh().template getSpaceStepsProducts<  0, -1,  0 >();
       const RealType& hzDiv = entity.getMesh().template getSpaceStepsProducts<  0,  0, -1 >();
       if( entity.getOrientation().x() != 0.0 )
          const RealType u_x =
-            ( u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >()] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >()] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  0,  0 >()] -
+              u[ neighborEntities.template getEntityIndex< -1,  0,  0 >()] ) * hxDiv;
          RealType u_y;
          if( entity.getCoordinates().y() > 0 )
             if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
                u_y = 0.25 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  1,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex< -1,  1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  1, -1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hyDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  1,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex< -1,  1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  1, -1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hyDiv;
             else // if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
                u_y = 0.5 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  1, -1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hyDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  1, -1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hyDiv;
          else // if( entity.getCoordinates().y() > 0 )
             u_y = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  1,  0 >() ] +
-              u[ neighbourEntities.template getEntityIndex< -1,  1,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] ) * hyDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  1,  0 >() ] +
+              u[ neighborEntities.template getEntityIndex< -1,  1,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] ) * hyDiv;
          RealType u_z;
@@ -341,27 +341,27 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
             if( entity.getCoordinates().z() < entity.getMesh().getDimensions().z() - 1 )
                u_z = 0.25 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  0,  1 >() ] +
-                 u[ neighbourEntities.template getEntityIndex< -1,  0,  1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  1,  0, -1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hzDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  0,  1 >() ] +
+                 u[ neighborEntities.template getEntityIndex< -1,  0,  1 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  1,  0, -1 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hzDiv;
             else //if( entity.getCoordinates().z() < entity.getMesh().getDimensions().z() - 1 )
                u_z = 0.5 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  1,  0, -1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hzDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  1,  0, -1 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hzDiv;
          else //if( entity.getCoordinates().z() > 0 )
             u_z = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  0,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] ) * hzDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  0,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex< -1,  0,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] ) * hzDiv;
          return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y + u_z * u_z );
@@ -373,58 +373,58 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
             if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
                u_x = 0.25 *
-               ( u[ neighbourEntities.template getEntityIndex<  1,  1,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex<  1, -1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1,  1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hxDiv;
+               ( u[ neighborEntities.template getEntityIndex<  1,  1,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex<  1, -1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1,  1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hxDiv;
             else // if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
                u_x = 0.5 *
-               ( u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1,  1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hxDiv;
+               ( u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1,  1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex< -1, -1,  0 >() ] ) * hxDiv;
          else // if( entity.getCoordinates().x() > 0 )
             u_x = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  1,  0 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  1, -1,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  1,  0 >() ] +
+              u[ neighborEntities.template getEntityIndex<  1, -1,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] ) * hxDiv;
          const RealType u_y =
-            ( u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >()] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >()] ) * hyDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  1,  0 >()] -
+              u[ neighborEntities.template getEntityIndex<  0, -1,  0 >()] ) * hyDiv;
          RealType u_z;
          if( entity.getCoordinates().z() > 0 )
             if( entity.getCoordinates().z() < entity.getMesh().getDimensions().z() - 1 )
                u_z = 0.25 *
-               ( u[ neighbourEntities.template getEntityIndex<  0,  1,  1 >() ] +
-                 u[ neighbourEntities.template getEntityIndex<  0, -1,  1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  0,  1, -1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hzDiv;
+               ( u[ neighborEntities.template getEntityIndex<  0,  1,  1 >() ] +
+                 u[ neighborEntities.template getEntityIndex<  0, -1,  1 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  0,  1, -1 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hzDiv;
             else // if( entity.getCoordinates().z() < entity.getMesh().getDimensions().z() - 1 )
                u_z = 0.5 *
-               ( u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] +
-                 u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  0,  1, -1 >() ] -
-                 u[ neighbourEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hzDiv;
+               ( u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] +
+                 u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  0,  1, -1 >() ] -
+                 u[ neighborEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hzDiv;
          else // if( entity.getCoordinates().z() > 0 )
             u_z = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  0,  1,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] ) * hzDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  1,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  0, -1,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] ) * hzDiv;
          return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y + u_z * u_z );
@@ -434,28 +434,28 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
          if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
             u_x = 0.25 *
-            ( u[ neighbourEntities.template getEntityIndex<  1,  0,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  1,  0, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  1,  0,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  1,  0, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hxDiv;
          else // if( entity.getCoordinates().x() < entity.getMesh().getDimensions().x() - 1 )
             u_x = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hxDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex< -1,  0, -1 >() ] ) * hxDiv;
       else // if( entity.getCoordinates().x() > 0 )
          u_x = 0.5 *
-         ( u[ neighbourEntities.template getEntityIndex<  1,  0,  1 >() ] +
-           u[ neighbourEntities.template getEntityIndex<  1,  0, -1 >() ] -
-           u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] -
-           u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] ) * hxDiv;
+         ( u[ neighborEntities.template getEntityIndex<  1,  0,  1 >() ] +
+           u[ neighborEntities.template getEntityIndex<  1,  0, -1 >() ] -
+           u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] -
+           u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] ) * hxDiv;
       RealType u_y;
       if( entity.getCoordinates().y() > 0 )
@@ -463,31 +463,31 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real
          if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
             u_y = 0.25 *
-            ( u[ neighbourEntities.template getEntityIndex<  0,  1,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  0,  1, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hyDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  1,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  0,  1, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hyDiv;
          else //if( entity.getCoordinates().y() < entity.getMesh().getDimensions().y() - 1 )
             u_y = 0.5 *
-            ( u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] +
-              u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1,  1 >() ] -
-              u[ neighbourEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hyDiv;
+            ( u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] +
+              u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1,  1 >() ] -
+              u[ neighborEntities.template getEntityIndex<  0, -1, -1 >() ] ) * hyDiv;
       else //if( entity.getCoordinates().y() > 0 )
          u_y = 0.5 *
-         ( u[ neighbourEntities.template getEntityIndex<  0,  1,  1 >() ] +
-           u[ neighbourEntities.template getEntityIndex<  0,  1, -1 >() ] -
-           u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] -
-           u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] ) * hyDiv;
+         ( u[ neighborEntities.template getEntityIndex<  0,  1,  1 >() ] +
+           u[ neighborEntities.template getEntityIndex<  0,  1, -1 >() ] -
+           u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] -
+           u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] ) * hyDiv;
       const RealType u_z =
-         ( u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >()] -
-           u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >()] ) * hzDiv;
+         ( u[ neighborEntities.template getEntityIndex<  0,  0,  1 >()] -
+           u[ neighborEntities.template getEntityIndex<  0,  0, -1 >()] ) * hzDiv;
       return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y + u_z * u_z );
diff --git a/src/TNL/Operators/geometric/ExactGradientNorm.h b/src/TNL/Operators/geometric/ExactGradientNorm.h
index 48d67a12a8392f3838a5bbc19751325e0e651528..cf7e3384820be1c2e8628fe655211037c6dde1dd 100644
--- a/src/TNL/Operators/geometric/ExactGradientNorm.h
+++ b/src/TNL/Operators/geometric/ExactGradientNorm.h
@@ -10,8 +10,6 @@
 #pragma once
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Meshes/Grid.h>
 #include <TNL/Functions/Domain.h>
diff --git a/src/TNL/Operators/geometric/FDMGradientNorm.h b/src/TNL/Operators/geometric/FDMGradientNorm.h
index d574b90123835169e4e390d7a93c04f758f114ad..a5eb4536317a0ff5258a681585f67557ff029d59 100644
--- a/src/TNL/Operators/geometric/FDMGradientNorm.h
+++ b/src/TNL/Operators/geometric/FDMGradientNorm.h
@@ -20,7 +20,7 @@ namespace Operators {
 template< typename Mesh,
           template< typename, int, int, int, typename, typename > class DifferenceOperatorTemplate = ForwardFiniteDifference,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class FDMGradientNorm
diff --git a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
index 9d78daa5da338ec2e435ec0afc8656e819a3920d..2d86167b1c1ee3a9466635c2605931248d80eb56 100644
--- a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
+++ b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h
@@ -20,7 +20,7 @@ namespace Operators {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class TwoSidedGradientNorm
diff --git a/src/TNL/Operators/interpolants/MeshEntitiesInterpolants.h b/src/TNL/Operators/interpolants/MeshEntitiesInterpolants.h
index bb171716806a69daca7a81f7c83ab321c00f6e40..ca9381d1f418e1a033d39c4619678b94856afc63 100644
--- a/src/TNL/Operators/interpolants/MeshEntitiesInterpolants.h
+++ b/src/TNL/Operators/interpolants/MeshEntitiesInterpolants.h
@@ -48,10 +48,10 @@ class MeshEntitiesInterpolants< Meshes::Grid< 1, Real, Device, Index >, 1, 0 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();
-         return 0.5 * ( u[ neighbourEntities.template getEntityIndex< -1 >() ] +
-                        u[ neighbourEntities.template getEntityIndex<  1 >() ] );
+         return 0.5 * ( u[ neighborEntities.template getEntityIndex< -1 >() ] +
+                        u[ neighborEntities.template getEntityIndex<  1 >() ] );
@@ -80,10 +80,10 @@ class MeshEntitiesInterpolants< Meshes::Grid< 1, Real, Device, Index >, 0, 1 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 0 >& neighbourEntities = entity.template getNeighbourEntities< 0 >();
+         const typename MeshEntity::template NeighborEntities< 0 >& neighborEntities = entity.template getNeighborEntities< 0 >();
-         return 0.5 * ( u[ neighbourEntities.template getEntityIndex< -1 >() ] +
-                        u[ neighbourEntities.template getEntityIndex<  1 >() ] );
+         return 0.5 * ( u[ neighborEntities.template getEntityIndex< -1 >() ] +
+                        u[ neighborEntities.template getEntityIndex<  1 >() ] );
@@ -112,14 +112,14 @@ class MeshEntitiesInterpolants< Meshes::Grid< 2, Real, Device, Index >, 2, 1 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
          if( entity.getOrientation().x() == 1.0 )
-            return 0.5 * ( u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] +
-                           u[ neighbourEntities.template getEntityIndex<  1, 0 >() ] );
+            return 0.5 * ( u[ neighborEntities.template getEntityIndex< -1, 0 >() ] +
+                           u[ neighborEntities.template getEntityIndex<  1, 0 >() ] );
-            return 0.5 * ( u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] +
-                           u[ neighbourEntities.template getEntityIndex< 0,  1 >() ] );
+            return 0.5 * ( u[ neighborEntities.template getEntityIndex< 0, -1 >() ] +
+                           u[ neighborEntities.template getEntityIndex< 0,  1 >() ] );
@@ -148,12 +148,12 @@ class MeshEntitiesInterpolants< Meshes::Grid< 2, Real, Device, Index >, 2, 0 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
-         return 0.25 * ( u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] );
+         return 0.25 * ( u[ neighborEntities.template getEntityIndex< -1,  1 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+                         u[ neighborEntities.template getEntityIndex< -1, -1 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  1, -1 >() ] );
@@ -182,12 +182,12 @@ class MeshEntitiesInterpolants< Meshes::Grid< 2, Real, Device, Index >, 1, 2 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.template getNeighbourEntities< 1 >();
+         const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.template getNeighborEntities< 1 >();
-         return 0.25 * ( u[ neighbourEntities.template getEntityIndex< -1,  0 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  1,  0 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  0,  1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  0, -1 >() ] );
+         return 0.25 * ( u[ neighborEntities.template getEntityIndex< -1,  0 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  1,  0 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  0,  1 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  0, -1 >() ] );
@@ -216,12 +216,12 @@ class MeshEntitiesInterpolants< Meshes::Grid< 2, Real, Device, Index >, 0, 2 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 0 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 0 >& neighborEntities = entity.getNeighborEntities();
-         return 0.25 * ( u[ neighbourEntities.template getEntityIndex< -1,  1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  1,  1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex< -1, -1 >() ] +
-                         u[ neighbourEntities.template getEntityIndex<  1, -1 >() ] );
+         return 0.25 * ( u[ neighborEntities.template getEntityIndex< -1,  1 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  1,  1 >() ] +
+                         u[ neighborEntities.template getEntityIndex< -1, -1 >() ] +
+                         u[ neighborEntities.template getEntityIndex<  1, -1 >() ] );
@@ -250,17 +250,17 @@ class MeshEntitiesInterpolants< Meshes::Grid< 3, Real, Device, Index >, 3, 2 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+         const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
          if( entity.getOrientation().x() == 1.0 )
-            return 0.5 * ( u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] +
-                           u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] );
+            return 0.5 * ( u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] +
+                           u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] );
          if( entity.getOrientation().y() == 1.0 )
-            return 0.5 * ( u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] +
-                           u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] );
+            return 0.5 * ( u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] +
+                           u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] );
-            return 0.5 * ( u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] +
-                           u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] );
+            return 0.5 * ( u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] +
+                           u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] );
@@ -289,14 +289,14 @@ class MeshEntitiesInterpolants< Meshes::Grid< 3, Real, Device, Index >, 2, 3 >
          static_assert( std::is_same< typename MeshEntity::MeshType, MeshType >::value,
             "The mesh entity belongs to other mesh type then the interpolants." );
-         const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.template getNeighbourEntities< 2 >();
+         const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.template getNeighborEntities< 2 >();
-         return 1.0 / 6.0 * ( u[ neighbourEntities.template getEntityIndex< -1,  0,  0 >() ] +
-                              u[ neighbourEntities.template getEntityIndex<  1,  0,  0 >() ] +
-                              u[ neighbourEntities.template getEntityIndex<  0, -1,  0 >() ] +
-                              u[ neighbourEntities.template getEntityIndex<  0,  1,  0 >() ] +
-                              u[ neighbourEntities.template getEntityIndex<  0,  0, -1 >() ] +
-                              u[ neighbourEntities.template getEntityIndex<  0,  0,  1 >() ] );
+         return 1.0 / 6.0 * ( u[ neighborEntities.template getEntityIndex< -1,  0,  0 >() ] +
+                              u[ neighborEntities.template getEntityIndex<  1,  0,  0 >() ] +
+                              u[ neighborEntities.template getEntityIndex<  0, -1,  0 >() ] +
+                              u[ neighborEntities.template getEntityIndex<  0,  1,  0 >() ] +
+                              u[ neighborEntities.template getEntityIndex<  0,  0, -1 >() ] +
+                              u[ neighborEntities.template getEntityIndex<  0,  0,  1 >() ] );
diff --git a/src/TNL/Operators/operator-Q/CMakeLists.txt b/src/TNL/Operators/operator-Q/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
index 5cf59f1766f8accd727b173dd1dc134bd166f06b..7f145198f049c2062e0e53d33b215e0fe8c0a3b8 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h
@@ -19,7 +19,7 @@ namespace Operators {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType,
+          typename Index = typename Mesh::GlobalIndexType,
           int Precomputation = 0 > 
 class tnlFiniteVolumeOperatorQ
diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
index e27927f97d0c833529c705a15b6e5e0ba845d257..0fae70006b3b9f69c8157cfcf2ad5538489b8f1d 100644
--- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h
@@ -270,37 +270,37 @@ boundaryDerivative(
    const IndexType& dy,
    const IndexType& dz ) const
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();      
    const IndexType& cellIndex = entity.getIndex();
     if ( ( AxeX == 1 ) && ( AxeY == 0 ) && ( AxeZ == 0 ) )
         if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0 >() * ( u[ neighbourEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] );
+            return mesh.template getSpaceStepsProducts< -1, 0 >() * ( u[ neighborEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] );
         if ( ( dx == -1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0 >() * ( u[ cellIndex ] - u[ neighbourEntities.template getEntityIndex< -1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0 >() * ( u[ cellIndex ] - u[ neighborEntities.template getEntityIndex< -1,0 >() ] );
         if ( ( dx == 0 ) && ( dy == 1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,1 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,1 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,1 >() ] - u[ neighborEntities.template getEntityIndex< -1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,1 >() ] );
         if ( ( dx == 0 ) && ( dy == -1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,-1 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,-1 >() ] - u[ neighborEntities.template getEntityIndex< -1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,-1 >() ] );
     if ( ( AxeX == 0 ) && ( AxeY == 1 ) && ( AxeZ == 0 ) )
         if ( ( dx == 0 ) && ( dy == 1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1 >() * ( u[ neighbourEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] );
+            return mesh.template getSpaceStepsProducts< 0, -1 >() * ( u[ neighborEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] );
         if ( ( dx == 0 ) && ( dy == -1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1 >() * ( u[ cellIndex ] - u[ neighbourEntities.template getEntityIndex< 0,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1 >() * ( u[ cellIndex ] - u[ neighborEntities.template getEntityIndex< 0,-1 >() ] );
         if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 1,-1 >() ] );
         if ( ( dx == -1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< -1,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< -1,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,-1 >() ] );
     return 0.0;
@@ -321,14 +321,14 @@ operator()( const MeshEntity& entity,
           const IndexType& dy,
           const IndexType& dz ) const
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const IndexType& cellIndex = entity.getIndex();
     if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == 0 ) )
-        return ::sqrt( this->eps + ( u[ neighbourEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] ) * 
-                ( u[ neighbourEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] )
-                * mesh.template getSpaceStepsProducts< 0, -1 >() * mesh.template getSpaceStepsProducts< 0, -1 >() + ( u[ neighbourEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] ) 
-                * ( u[ neighbourEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -1, 0 >() * mesh.template getSpaceStepsProducts< -1, 0 >() );
+        return ::sqrt( this->eps + ( u[ neighborEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] ) * 
+                ( u[ neighborEntities.template getEntityIndex< 0,1 >() ] - u[ cellIndex ] )
+                * mesh.template getSpaceStepsProducts< 0, -1 >() * mesh.template getSpaceStepsProducts< 0, -1 >() + ( u[ neighborEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] ) 
+                * ( u[ neighborEntities.template getEntityIndex< 1,0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -1, 0 >() * mesh.template getSpaceStepsProducts< -1, 0 >() );
     if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
         return ::sqrt( this->eps + this->template boundaryDerivative< MeshEntity, Vector,1,0 >( mesh, entity, u, time, 1, 0 ) * 
                this->template boundaryDerivative< MeshEntity, Vector,1,0 >( mesh, entity, u, time, 1, 0 ) + 
@@ -437,8 +437,7 @@ tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real,
 bind( Vector& u) 
-    if(q.setSize(u.getSize()))
-        return 1;
+    q.setSize(u.getSize());
     return 0;
@@ -480,76 +479,76 @@ boundaryDerivative(
    const IndexType& dy,
    const IndexType& dz ) const
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();
    const IndexType& cellIndex = entity.getIndex();    
     if ( ( AxeX == 1 ) && ( AxeY == 0 ) && ( AxeZ == 0 ) )
         if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] );
         if ( ( dx == -1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * ( u[ cellIndex ] - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * ( u[ cellIndex ] - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ] );
         if ( ( dx == 0 ) && ( dy == 1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,1,0 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,1,0 >() ] - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,1,0 >() ] );
         if ( ( dx == 0 ) && ( dy == -1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,-1,0 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,-1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,-1,0 >() ] - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,-1,0 >() ] );
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == 1 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,0,1 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,0,1 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,0,1 >() ] - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,0,1 >() ] );
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == -1 ) )
-            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,0,-1 >() ] - u[ neighbourEntities.template getEntityIndex< -1,0,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,0,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< -1, 0, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,0,-1 >() ] - u[ neighborEntities.template getEntityIndex< -1,0,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,0,-1 >() ] );
     if ( ( AxeX == 0 ) && ( AxeY == 1 ) && ( AxeZ == 0 ) )
         if ( ( dx == 0 ) && ( dy == 1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] );
         if ( ( dx == 0 ) && ( dy == -1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * ( u[ cellIndex ] - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * ( u[ cellIndex ] - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] );
         if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,1,0 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 1,-1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,1,0 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 1,-1,0 >() ] );
         if ( ( dx == -1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< -1,1,0 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,-1,0 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< -1,1,0 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,-1,0 >() ] );
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == 1 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 0,1,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 0,-1,1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 0,1,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 0,-1,1 >() ] );
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == -1 ) )
-            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 0,1,-1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,-1,0 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 0,-1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, -1, 0 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 0,1,-1 >() ] - u[ neighborEntities.template getEntityIndex< 0,-1,0 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 0,-1,-1 >() ] );
     if ( ( AxeX == 0 ) && ( AxeY == 0 ) && ( AxeZ == 1 ) )
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == 1 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] );
         if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == -1 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * ( u[ cellIndex ] - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * ( u[ cellIndex ] - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] );
         if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 1,0,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 1,0,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 1,0,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 1,0,-1 >() ] );
         if ( ( dx == -1 ) && ( dy == 0 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< -1,0,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< -1,0,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< -1,0,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< -1,0,-1 >() ] );
         if ( ( dx == 0 ) && ( dy == 1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 0,1,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 0,1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 0,1,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 0,1,-1 >() ] );
         if ( ( dx == 0 ) && ( dy == -1 ) && ( dz == 0 ) )
-            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] + 
-                   u[ neighbourEntities.template getEntityIndex< 0,-1,1 >() ] - u[ neighbourEntities.template getEntityIndex< 0,0,-1 >() ] -
-                   u[ neighbourEntities.template getEntityIndex< 0,-1,-1 >() ] );
+            return mesh.template getSpaceStepsProducts< 0, 0, -1 >() * 0.25 * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] + 
+                   u[ neighborEntities.template getEntityIndex< 0,-1,1 >() ] - u[ neighborEntities.template getEntityIndex< 0,0,-1 >() ] -
+                   u[ neighborEntities.template getEntityIndex< 0,-1,-1 >() ] );
     return 0.0;
@@ -571,16 +570,16 @@ operator()(
    const IndexType& dy,
    const IndexType& dz ) const
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const IndexType& cellIndex = entity.getIndex();     
     if ( ( dx == 0 ) && ( dy == 0 ) && ( dz == 0 ) )
-        return ::sqrt( this->eps + ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] ) * 
-                ( u[ neighbourEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] )
-                * mesh.template getSpaceStepsProducts< 0, -1, 0 >() * mesh.template getSpaceStepsProducts< 0, -1, 0 >() + ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] ) 
-                * ( u[ neighbourEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -1, 0, 0 >() * mesh.template getSpaceStepsProducts< -1, 0, 0 >()
-                + ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] ) 
-                * ( u[ neighbourEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, 0, -1 >() * mesh.template getSpaceStepsProducts< 0, 0, -1 >() );
+        return ::sqrt( this->eps + ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] ) * 
+                ( u[ neighborEntities.template getEntityIndex< 0,1,0 >() ] - u[ cellIndex ] )
+                * mesh.template getSpaceStepsProducts< 0, -1, 0 >() * mesh.template getSpaceStepsProducts< 0, -1, 0 >() + ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] ) 
+                * ( u[ neighborEntities.template getEntityIndex< 1,0,0 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< -1, 0, 0 >() * mesh.template getSpaceStepsProducts< -1, 0, 0 >()
+                + ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] ) 
+                * ( u[ neighborEntities.template getEntityIndex< 0,0,1 >() ] - u[ cellIndex ] ) * mesh.template getSpaceStepsProducts< 0, 0, -1 >() * mesh.template getSpaceStepsProducts< 0, 0, -1 >() );
     if ( ( dx == 1 ) && ( dy == 0 ) && ( dz == 0 ) )
         return ::sqrt( this->eps + this->template boundaryDerivative< MeshEntity, Vector,1,0,0 >( mesh, entity, u, time, 1, 0, 0 ) * 
                this->template boundaryDerivative< MeshEntity, Vector,1,0,0 >( mesh, entity, u, time, 1, 0, 0 ) + 
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
index 29b2ac29091f8490adba7dc91c9b6075aea026b3..a96d22f5134029fb9686150713696da47ff05bfc 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h
@@ -10,8 +10,6 @@
 #pragma once
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Meshes/Grid.h>
 namespace TNL {
@@ -19,7 +17,7 @@ namespace Operators {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType > 
+          typename Index = typename Mesh::GlobalIndexType > 
 class tnlOneSideDiffOperatorQ
diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
index 483680f5a174b11c917fa34b22bc8fdbc47cb788..21f5e44f08ec29fe365de11cfcbb5fb898f9af26 100644
--- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
+++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h
@@ -58,9 +58,9 @@ operator()( const MeshFunction& u,
             const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
-   const RealType& u_x = ( u[ neighbourEntities.template getEntityIndex< 1 >() ] - u[ cellIndex ] ) *
+   const RealType& u_x = ( u[ neighborEntities.template getEntityIndex< 1 >() ] - u[ cellIndex ] ) *
                          mesh.template getSpaceStepsProducts< -1 >();
    return ::sqrt( this->epsSquare + u_x * u_x );          
@@ -79,12 +79,12 @@ getValueStriped( const MeshFunction& u,
                  const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const RealType& u_c = u[ cellIndex ];
-   const RealType& u_x_f = ( u[ neighbourEntities.template getEntityIndex< 1 >() ] - u_c ) * 
+   const RealType& u_x_f = ( u[ neighborEntities.template getEntityIndex< 1 >() ] - u_c ) * 
                            mesh.template getSpaceStepsProducts< -1 >();
-   const RealType& u_x_b = ( u_c - u[ neighbourEntities.template getEntityIndex< -1 >() ] ) * 
+   const RealType& u_x_b = ( u_c - u[ neighborEntities.template getEntityIndex< -1 >() ] ) * 
                            mesh.template getSpaceStepsProducts< -1 >();   
    return ::sqrt( this->epsSquare + 0.5 * ( u_x_f * u_x_f + u_x_b * u_x_b ) );
@@ -134,12 +134,12 @@ operator()( const MeshFunction& u,
             const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const RealType& u_c = u[ cellIndex ];
-   const RealType u_x = ( u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] - u_c ) *
+   const RealType u_x = ( u[ neighborEntities.template getEntityIndex< 1, 0 >() ] - u_c ) *
                          mesh.template getSpaceStepsProducts< -1, 0 >();
-   const RealType u_y = ( u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] - u_c ) *
+   const RealType u_y = ( u[ neighborEntities.template getEntityIndex< 0, 1 >() ] - u_c ) *
                          mesh.template getSpaceStepsProducts< 0, -1 >();
    return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y ); 
@@ -158,16 +158,16 @@ getValueStriped( const MeshFunction& u,
                  const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const RealType& u_c = u[ cellIndex ];
-   const RealType u_x_f = ( u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] - u_c ) *
+   const RealType u_x_f = ( u[ neighborEntities.template getEntityIndex< 1, 0 >() ] - u_c ) *
                           mesh.template getSpaceStepsProducts< -1, 0 >();
-   const RealType u_y_f = ( u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] - u_c ) *
+   const RealType u_y_f = ( u[ neighborEntities.template getEntityIndex< 0, 1 >() ] - u_c ) *
                           mesh.template getSpaceStepsProducts< 0, -1 >();
-   const RealType u_x_b = ( u_c - u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] ) *
+   const RealType u_x_b = ( u_c - u[ neighborEntities.template getEntityIndex< -1, 0 >() ] ) *
                           mesh.template getSpaceStepsProducts< -1, 0 >();
-   const RealType u_y_b = ( u_c - u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] ) *
+   const RealType u_y_b = ( u_c - u[ neighborEntities.template getEntityIndex< 0, -1 >() ] ) *
                           mesh.template getSpaceStepsProducts< 0, -1 >();
    return ::sqrt( this->epsSquare + 
@@ -219,15 +219,15 @@ operator()( const MeshFunction& u,
             const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const RealType& u_c =u[ cellIndex ];
-   const RealType u_x = ( u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] - u_c ) *
+   const RealType u_x = ( u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] - u_c ) *
                          mesh.template getSpaceStepsProducts< -1, 0, 0 >();
-   const RealType u_y = ( u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] - u_c ) *
+   const RealType u_y = ( u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] - u_c ) *
                          mesh.template getSpaceStepsProducts< 0, -1, 0 >();
-   const RealType u_z = ( u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] - u_c ) *
+   const RealType u_z = ( u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] - u_c ) *
                          mesh.template getSpaceStepsProducts< 0, 0, -1 >();
    return ::sqrt( this->epsSquare + u_x * u_x + u_y * u_y + u_z * u_z ); 
@@ -246,21 +246,21 @@ getValueStriped( const MeshFunction& u,
                  const Real& time ) const
    const IndexType& cellIndex = entity.getIndex();
-   const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities();      
+   const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities();      
    const typename MeshEntity::MeshType& mesh = entity.getMesh();
    const RealType& u_c = u[ cellIndex ];
-   const RealType u_x_f = ( u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] - u_c ) *
+   const RealType u_x_f = ( u[ neighborEntities.template getEntityIndex< 1, 0, 0 >() ] - u_c ) *
                           mesh.template getSpaceStepsProducts< -1, 0, 0 >();
-   const RealType u_y_f = ( u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] - u_c ) *
+   const RealType u_y_f = ( u[ neighborEntities.template getEntityIndex< 0, 1, 0 >() ] - u_c ) *
                           mesh.template getSpaceStepsProducts< 0, -1, 0 >();
-   const RealType u_z_f = ( u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] - u_c ) *
+   const RealType u_z_f = ( u[ neighborEntities.template getEntityIndex< 0, 0, 1 >() ] - u_c ) *
                           mesh.template getSpaceStepsProducts< 0, 0, -1 >();   
-   const RealType u_x_b = ( u_c - u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] ) *
+   const RealType u_x_b = ( u_c - u[ neighborEntities.template getEntityIndex< -1, 0, 0 >() ] ) *
                           mesh.template getSpaceStepsProducts< -1, 0, 0 >();
-   const RealType u_y_b = ( u_c - u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] ) *
+   const RealType u_y_b = ( u_c - u[ neighborEntities.template getEntityIndex< 0, -1, 0 >() ] ) *
                           mesh.template getSpaceStepsProducts< 0, -1, 0 >();
-   const RealType u_z_b = ( u_c - u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] ) *
+   const RealType u_z_b = ( u_c - u[ neighborEntities.template getEntityIndex< 0, 0, -1 >() ] ) *
                           mesh.template getSpaceStepsProducts< 0, 0, -1 >();
    return ::sqrt( this->epsSquare + 
diff --git a/src/TNL/Operators/operator-curvature/CMakeLists.txt b/src/TNL/Operators/operator-curvature/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
index 2c7d15fe99ab715a6a5ea8a98710f3cceddc29ad..33a20e255ce950bfa9714504185c1f236ad4abed 100644
--- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
+++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h
@@ -31,14 +31,8 @@ class ExactOperatorCurvature< OperatorQ, 1 >
       static String getType();
-#ifdef HAVE_NOT_CXX11      
-      template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real >
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
-#ifdef HAVE_CUDA
-      __device__ __host__
+      __cuda_callable__
       static Real getValue( const Function& function,
                             const Point& v,
                             const Real& time = 0.0, const Real& eps = 1.0 );
@@ -54,14 +48,8 @@ class ExactOperatorCurvature< ExactOperatorQ, 2 >
       static String getType();
-#ifdef HAVE_NOT_CXX11      
-      template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real >
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
-#ifdef HAVE_CUDA
-      __device__ __host__
+      __cuda_callable__
       static Real getValue( const Function& function,
                             const Point& v,
                             const Real& time = 0.0, const Real& eps = 1.0 );
@@ -76,14 +64,8 @@ class ExactOperatorCurvature< ExactOperatorQ, 3 >
       static String getType();
-#ifdef HAVE_NOT_CXX11      
-      template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real >
       template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType >
-#ifdef HAVE_CUDA
-      __device__ __host__
+      __cuda_callable__
       static Real getValue( const Function& function,
                             const Point& v,
                             const Real& time = 0.0, const Real& eps = 1.0 )
diff --git a/src/TNL/ParallelFor.h b/src/TNL/ParallelFor.h
new file mode 100644
index 0000000000000000000000000000000000000000..b1e81ca97bf66576875aa1d0dcb34ecc0298d5df
--- /dev/null
+++ b/src/TNL/ParallelFor.h
@@ -0,0 +1,84 @@
+                          ParallelFor.h  -  description
+                             -------------------
+    begin                : Mar 10, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <TNL/Devices/Host.h>
+#include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/CudaDeviceInfo.h>
+ * The implementation of ParallelFor is not meant to provide maximum performance
+ * at every cost, but maximum flexibility for operating with data stored on the
+ * device.
+ *
+ * The grid-stride loop for CUDA has been inspired by Nvidia's blog post:
+ * https://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops/
+ *
+ * Implemented by: Jakub Klinkovsky
+ */
+namespace TNL {
+template< typename Device = Devices::Host >
+struct ParallelFor
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index start, Index end, Function f, FunctionArgs... args )
+   {
+      #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )
+      for( Index i = start; i < end; i++ )
+         f( i, args... );
+   }
+#ifdef HAVE_CUDA
+template< typename Index,
+          typename Function,
+          typename... FunctionArgs >
+__global__ void
+ParallelForKernel( Index start, Index end, Function f, FunctionArgs... args )
+   for( Index i = start + blockIdx.x * blockDim.x + threadIdx.x;
+        i < end;
+        i += blockDim.x * gridDim.x )
+   {
+      f( i, args... );
+   }
+struct ParallelFor< Devices::Cuda >
+   template< typename Index,
+             typename Function,
+             typename... FunctionArgs >
+   static void exec( Index start, Index end, Function f, FunctionArgs... args )
+   {
+#ifdef HAVE_CUDA
+      if( end > start ) {
+         dim3 blockSize( 256 );
+         dim3 gridSize;
+         const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() );
+         gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) );
+         Devices::Cuda::synchronizeDevice();
+         ParallelForKernel<<< gridSize, blockSize >>>( start, end, f, args... );
+      }
+   }
+} // namespace TNL
diff --git a/src/TNL/Problems/CMakeLists.txt b/src/TNL/Problems/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h
index 1d5e95c9b70af4b91ea96666670ba737734f5d4c..cb01045e6cd00aec1d91f19357cc555144f4848e 100644
--- a/src/TNL/Problems/HeatEquationProblem.h
+++ b/src/TNL/Problems/HeatEquationProblem.h
@@ -108,10 +108,15 @@ class HeatEquationProblem : public PDEProblem< Mesh,
                                  DofVectorPointer& rightHandSidePointer,
                                  MeshDependentDataPointer& meshDependentData );
+      template< typename Matrix >
+      void saveFailedLinearSystem( const Matrix& matrix,
+                                   const DofVectorType& dofs,
+                                   const DofVectorType& rightHandSide ) const;
          MeshFunctionPointer uPointer;
+         MeshFunctionPointer fuPointer;
          DifferentialOperatorPointer differentialOperatorPointer;
diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h
index 182e2fb0726249041fc839d7c842b601ac9be1b3..39255752baa2c81112a9db5238ef30fc2bec4c90 100644
--- a/src/TNL/Problems/HeatEquationProblem_impl.h
+++ b/src/TNL/Problems/HeatEquationProblem_impl.h
@@ -154,8 +154,7 @@ setupLinearSystem( const MeshPointer& meshPointer,
    const IndexType dofs = this->getDofs( meshPointer );
    typedef typename MatrixPointer::ObjectType::CompressedRowLengthsVector CompressedRowLengthsVectorType;
    SharedPointer< CompressedRowLengthsVectorType > rowLengthsPointer;
-   if( ! rowLengthsPointer->setSize( dofs ) )
-      return false;
+   rowLengthsPointer->setSize( dofs );
    Matrices::MatrixSetter< MeshType, DifferentialOperator, BoundaryCondition, CompressedRowLengthsVectorType > matrixSetter;
    matrixSetter.template getCompressedRowLengths< typename Mesh::Cell >(
@@ -163,8 +162,7 @@ setupLinearSystem( const MeshPointer& meshPointer,
       rowLengthsPointer );
    matrixPointer->setDimensions( dofs, dofs );
-   if( ! matrixPointer->setCompressedRowLengths( *rowLengthsPointer ) )
-      return false;
+   matrixPointer->setCompressedRowLengths( *rowLengthsPointer );
    return true;
    //return MultidiagonalMatrixSetter< Mesh >::setupMatrix( mesh, matrix );
@@ -252,5 +250,18 @@ assemblyLinearSystem( const RealType& time,
       bPointer );
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename DifferentialOperator >
+    template< typename Matrix >
+HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
+saveFailedLinearSystem( const Matrix& matrix,
+                        const DofVectorType& dofs,
+                        const DofVectorType& rightHandSide ) const
 } // namespace Problems
 } // namespace TNL
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem.h b/src/TNL/Problems/MeanCurvatureFlowProblem.h
index 0e7a88df3312b5fde78ec4964b4984ed95500f8d..a06049647dcc0ef45ef7540fb3e9fd1ae39f479c 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem.h
@@ -31,7 +31,7 @@ template< typename Mesh,
           typename DifferentialOperator =
             OneSidedMeanCurvature< Mesh,
                                       typename Mesh::RealType,
-                                      typename Mesh::IndexType,
+                                      typename Mesh::GlobalIndexType,
                                       false > >
 class MeanCurvatureFlowProblem : public PDEProblem< Mesh,
                                                      typename DifferentialOperator::RealType,
@@ -97,6 +97,10 @@ class MeanCurvatureFlowProblem : public PDEProblem< Mesh,
                                  DofVectorType& rightHandSide,
                                  MeshDependentDataPointer& meshDependentData );
+      template< typename Matrix >
+      void saveFailedLinearSystem( const Matrix& matrix,
+                                   const DofVectorType& dofs,
+                                   const DofVectorType& rightHandSide ) const;
diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
index 79c730732fff1fea884d623076e08ce32dc3bf96..4dcba17fcd9ca6f78ce16aba0df8b6128ea8f3f0 100644
--- a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
+++ b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h
@@ -140,8 +140,7 @@ setupLinearSystem( const MeshType& mesh,
    const IndexType dofs = this->getDofs( mesh );
    typedef typename MatrixType::CompressedRowLengthsVector CompressedRowLengthsVectorType;
    CompressedRowLengthsVectorType rowLengths;
-   if( ! rowLengths.setSize( dofs ) )
-      return false;
+   rowLengths.setSize( dofs );
    MatrixSetter< MeshType, DifferentialOperator, BoundaryCondition, CompressedRowLengthsVectorType > matrixSetter;
    matrixSetter.template getCompressedRowLengths< typename Mesh::Cell >(
@@ -150,8 +149,7 @@ setupLinearSystem( const MeshType& mesh,
    matrix.setDimensions( dofs, dofs );
-   if( ! matrix.setCompressedRowLengths( rowLengths ) )
-      return false;
+   matrix.setCompressedRowLengths( rowLengths );
    return true;
@@ -262,5 +260,18 @@ assemblyLinearSystem( const RealType& time,
+template< typename Mesh,
+          typename BoundaryCondition,
+          typename RightHandSide,
+          typename DifferentialOperator >
+    template< typename Matrix >
+MeanCurvatureFlowProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >::
+saveFailedLinearSystem( const Matrix& matrix,
+                        const DofVectorType& dofs,
+                        const DofVectorType& rightHandSide ) const
 } // namespace Problems
 } // namespace TNL
diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h
index 431f6588828d2166f42bbc297eb533ee00dd260c..2da2f9981f1bc12673096d278c20a4b3b1b7eedc 100644
--- a/src/TNL/Problems/PDEProblem.h
+++ b/src/TNL/Problems/PDEProblem.h
@@ -20,7 +20,7 @@ namespace Problems {
 template< typename Mesh,
           typename Real = typename Mesh::RealType,
           typename Device = typename Mesh::DeviceType,
-          typename Index = typename Mesh::IndexType >
+          typename Index = typename Mesh::GlobalIndexType >
 class PDEProblem : public Problem< Real, Device, Index >
diff --git a/src/TNL/Problems/cfd/CMakeLists.txt b/src/TNL/Problems/cfd/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Problems/cfd/navier-stokes/CMakeLists.txt b/src/TNL/Problems/cfd/navier-stokes/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
index 50f2138dd01edf6112ecd6d151d1776aa2910edf..229ff4667b4e6d0c84e1ba27dd5464f40dd94b7f 100644
--- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
+++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h
@@ -345,10 +345,10 @@ void NavierStokesSolver< AdvectionScheme,
                                                             SolverVectorType& u,
                                                             SolverVectorType& fu )
-   TNL_ASSERT( this->advection, );
-   TNL_ASSERT( this->u1Viscosity, );
-   TNL_ASSERT( this->u2Viscosity, );
-   TNL_ASSERT( this->boundaryConditions, );
+   TNL_ASSERT_TRUE( this->advection, "advection scheme was not set" );
+   TNL_ASSERT_TRUE( this->u1Viscosity, "diffusion scheme was not set" );
+   TNL_ASSERT_TRUE( this->u2Viscosity, "diffusion scheme was not set" );
+   TNL_ASSERT_TRUE( this->boundaryConditions, "boundary conditions were not set" );
    SharedVector< RealType, DeviceType, IndexType > dofs_rho, dofs_rho_u1, dofs_rho_u2, dofs_e,
                                                       rho_t, rho_u1_t, rho_u2_t, e_t;
diff --git a/src/TNL/SharedPointer.h b/src/TNL/SharedPointer.h
index a0308daf19dce085093ef75633f13083d91cb03f..c9e71427518d373f26aec6ff57e386f5809d7365 100644
--- a/src/TNL/SharedPointer.h
+++ b/src/TNL/SharedPointer.h
@@ -14,11 +14,13 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/MIC.h>
 #include <TNL/SmartPointer.h>
 #include <cstring>
@@ -161,11 +163,18 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer
          return this->pd->data;
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pd;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pd;
+      }
       template< typename Device = Devices::Host >
       const Object& getData() const
@@ -395,11 +404,18 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
          return this->pd->data;
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pd;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pd;
+      }
       template< typename Device = Devices::Host >
       const Object& getData() const
@@ -500,7 +516,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
             TNL_ASSERT( this->cuda_pointer, );
             cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice );
-            if( ! checkCudaDevice ) {
+            if( ! TNL_CHECK_CUDA_DEVICE ) {
                return false;
@@ -544,12 +560,8 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       bool allocate( Args... args )
          this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
          // pass to device
          this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
-         if( ! this->cuda_pointer )
-            return false;
          // set last-sync state
@@ -602,4 +614,355 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
+#ifdef HAVE_MIC
+ * Specialization for MIC
+ */
+template< typename Object>
+class SharedPointer< Object, Devices::MIC > : public SmartPointer
+   private:
+      // Convenient template alias for controlling the selection of copy- and
+      // move-constructors and assignment operators using SFINAE.
+      // The type Object_ is "enabled" iff Object_ and Object are not the same,
+      // but after removing const and volatile qualifiers they are the same.
+      template< typename Object_ >
+      using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value &&
+                                      std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >;
+      // friend class will be needed for templated assignment operators
+      template< typename Object_, typename Device_>
+      friend class SharedPointer;
+   public:
+      typedef Object ObjectType;
+      typedef Devices::MIC DeviceType;
+      typedef SharedPointer< Object, Devices::MIC> ThisType;
+      template< typename... Args >
+      explicit  SharedPointer( Args... args )
+      : pd( nullptr ),
+        mic_pointer( nullptr )
+      {
+            this->allocate( args... );
+      }
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( const ThisType& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( const SharedPointer< Object_, DeviceType >& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         this->pd->counter += 1;
+      }
+      // this is needed only to avoid the default compiler-generated constructor
+      SharedPointer( ThisType&& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+      // conditional constructor for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      SharedPointer( SharedPointer< Object_, DeviceType >&& pointer )
+      : pd( (PointerData*) pointer.pd ),
+        mic_pointer( pointer.mic_pointer )
+      {
+         pointer.pd = nullptr;
+         pointer.mic_pointer = nullptr;
+      }
+      template< typename... Args >
+      bool recreate( Args... args )
+      {
+         std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl;
+         if( ! this->pd )
+            return this->allocate( args... );
+         if( this->pd->counter == 1 )
+         {
+            /****
+             * The object is not shared -> recreate it in-place, without reallocation
+             */
+            this->pd->data.~Object();
+            new ( &this->pd->data ) Object( args... );
+            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
+            this->set_last_sync_state();
+            return true;
+         }
+         // free will just decrement the counter
+         this->free();
+         return this->allocate( args... );
+      }
+      const Object* operator->() const
+      {
+         return &this->pd->data;
+      }
+      Object* operator->()
+      {
+         this->pd->maybe_modified = true;
+         return &this->pd->data;
+      }
+      const Object& operator *() const
+      {
+         return this->pd->data;
+      }
+      Object& operator *()
+      {
+         this->pd->maybe_modified = true;
+         return this->pd->data;
+      }
+      operator bool()
+      {
+         return this->pd;
+      }
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      const Object& getData() const
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+            return this->pd->data;
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+      }
+      template< typename Device = Devices::Host >
+      __cuda_callable__
+      Object& modifyData()
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+         {
+            this->pd->maybe_modified = true;
+            return this->pd->data;
+         }
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+      }
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( const ThisType& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         this->pd->counter += 1;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         return *this;
+      }
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( const SharedPointer< Object_, DeviceType >& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         this->pd->counter += 1;
+         std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         return *this;
+      }
+      // this is needed only to avoid the default compiler-generated operator
+      const ThisType& operator=( ThisType&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         return *this;
+      }
+      // conditional operator for non-const -> const data
+      template< typename Object_,
+                typename = typename Enabler< Object_ >::type >
+      const ThisType& operator=( SharedPointer< Object_, DeviceType >&& ptr )
+      {
+         this->free();
+         this->pd = (PointerData*) ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+         std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+         return *this;
+      }
+      bool synchronize()
+      {
+         if( ! this->pd )
+            return true;
+         if( this->modified() )
+         {
+            std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl;
+            std::cerr << "   ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl;
+            TNL_ASSERT( this->mic_pointer, );
+            Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));    
+            this->set_last_sync_state();
+            return true;
+         }
+         return false; //??
+      }
+      void clear()
+      {
+         this->free();
+      }
+      ~SharedPointer()
+      {
+         this->free();
+         Devices::MIC::removeSmartPointer( this );
+      }
+   protected:
+      struct PointerData
+      {
+         Object data;
+         uint8_t data_image[ sizeof(Object) ];
+         int counter;
+         bool maybe_modified;
+         template< typename... Args >
+         explicit PointerData( Args... args )
+         : data( args... ),
+           counter( 1 ),
+           maybe_modified( false )
+         {}
+      };
+      template< typename... Args >
+      bool allocate( Args... args )
+      {
+         this->pd = new PointerData( args... );
+         if( ! this->pd )
+            return false;
+         mic_pointer=(Object*)Devices::MIC::AllocMIC(sizeof(Object));
+         Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object));
+         if( ! this->mic_pointer )
+            return false;
+         // set last-sync state
+         this->set_last_sync_state();
+         std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl;
+         Devices::MIC::insertSmartPointer( this );
+         return true;
+      }
+      void set_last_sync_state()
+      {
+         TNL_ASSERT( this->pd, );
+         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) );
+         this->pd->maybe_modified = false;
+      }
+      bool modified()
+      {
+         TNL_ASSERT( this->pd, );
+         // optimization: skip bitwise comparison if we're sure that the data is the same
+         if( ! this->pd->maybe_modified )
+            return false;
+         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0;
+      }
+      void free()
+      {
+         if( this->pd )
+         {
+            std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl;
+            if( ! --this->pd->counter )
+            {
+               delete this->pd;
+               this->pd = nullptr;
+               if( this->mic_pointer )
+               {
+                   Devices::MIC::FreeMIC((void*)mic_pointer);
+                   mic_pointer=nullptr;
+               }
+               std::cerr << "...deleted data." << std::endl;
+            }
+         }
+      }
+      PointerData* pd;
+      // cuda_pointer can't be part of PointerData structure, since we would be
+      // unable to dereference this-pd on the device -- Nevím zda to platí pro MIC, asi jo
+      Object* mic_pointer;
+#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
+namespace Assert {
+template< typename Object, typename Device >
+struct Formatter< SharedPointer< Object, Device > >
+   static std::string
+   printToString( const SharedPointer< Object, Device >& value )
+   {
+      ::std::stringstream ss;
+      ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType()
+         << " > object at " << &value << ")";
+      return ss.str();
+   }
+} // namespace Assert
 } // namespace TNL
diff --git a/src/TNL/SmartPointersRegister.cpp b/src/TNL/SmartPointersRegister.cpp
index 7fe8a654f4a6ac6c435b4d8ccb02aadc84df37fb..03d3e058f004c201c61d7c307cce7327d5e106c9 100644
--- a/src/TNL/SmartPointersRegister.cpp
+++ b/src/TNL/SmartPointersRegister.cpp
@@ -44,7 +44,7 @@ bool SmartPointersRegister::synchronizeDevice( int deviceId )
       const auto & set = pointersOnDevices.at( deviceId );
       for( auto&& it : set )
          ( *it ).synchronize();
-      return checkCudaDevice;
+      return TNL_CHECK_CUDA_DEVICE;
    catch( std::out_of_range ) {
       return false;
diff --git a/src/TNL/Solvers/BuildConfigTags.h b/src/TNL/Solvers/BuildConfigTags.h
index 003974c3b39c3fae2ce7645803e70a0b5312ea05..e89aa9d925c7bfe748ec8ab1f98bbc8408787d18 100644
--- a/src/TNL/Solvers/BuildConfigTags.h
+++ b/src/TNL/Solvers/BuildConfigTags.h
@@ -16,6 +16,7 @@
 #include <TNL/Solvers/Linear/SOR.h>
 #include <TNL/Solvers/Linear/CG.h>
 #include <TNL/Solvers/Linear/BICGStab.h>
+#include <TNL/Solvers/Linear/BICGStabL.h>
 #include <TNL/Solvers/Linear/CWYGMRES.h>
 #include <TNL/Solvers/Linear/GMRES.h>
 #include <TNL/Solvers/Linear/TFQMR.h>
@@ -36,6 +37,10 @@ template< typename ConfigTag, typename Device > struct ConfigTagDevice{ enum { e
 template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::Cuda >{ enum { enabled = false }; };
+#ifndef HAVE_MIC
+template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::MIC >{ enum { enabled = false }; };
  * All real types are enabled by default.
@@ -133,6 +138,16 @@ public:
     using Template = Linear::BICGStab< Matrix, Preconditioner >;
+class  SemiImplicitBICGStabLSolverTag
+    template< typename Matrix,
+              typename Preconditioner = Linear::Preconditioners::Dummy< typename Matrix::RealType,
+                                                                        typename Matrix::DeviceType,
+                                                                        typename Matrix::IndexType > >
+    using Template = Linear::BICGStabL< Matrix, Preconditioner >;
 class  SemiImplicitCWYGMRESSolverTag
diff --git a/src/TNL/Solvers/CMakeLists.txt b/src/TNL/Solvers/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Solvers/Linear/BICGStab.h b/src/TNL/Solvers/Linear/BICGStab.h
index c7ba294d02100c386250fadbc4050336bcd46ebc..02d17965e890123968bf6059082cba0164e16f71 100644
--- a/src/TNL/Solvers/Linear/BICGStab.h
+++ b/src/TNL/Solvers/Linear/BICGStab.h
@@ -14,7 +14,6 @@
 #include <TNL/Object.h>
 #include <TNL/SharedPointer.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 #include <TNL/Solvers/IterativeSolver.h>
 #include <TNL/Solvers/Linear/LinearResidueGetter.h>
@@ -62,7 +61,7 @@ class BICGStab : public Object,
-   bool setSize( IndexType size );
+   void setSize( IndexType size );
    bool exact_residue;
diff --git a/src/TNL/Solvers/Linear/BICGStabL.h b/src/TNL/Solvers/Linear/BICGStabL.h
new file mode 100644
index 0000000000000000000000000000000000000000..124f70839950e5565a5fb9c0a6931c08abce4509
--- /dev/null
+++ b/src/TNL/Solvers/Linear/BICGStabL.h
@@ -0,0 +1,119 @@
+                          BICGStabL.h  -  description
+                             -------------------
+    begin                : Jul 4, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+ * BICGStabL implements an iterative solver for non-symmetric linear systems,
+ * using the BiCGstab(l) algorithm described in [1] and [2]. It is a
+ * generalization of the stabilized biconjugate-gradient (BiCGstab) algorithm
+ * proposed by van der Vorst [3]. BiCGstab(1) is equivalent to BiCGstab, and
+ * BiCGstab(2) is a slightly more efficient version of the BiCGstab2 algorithm
+ * by Gutknecht [4], while BiCGstab(l>2) is a further generalization.
+ *
+ * This code was implemented by: Jakub Klinkovsky <klinkjak@fjfi.cvut.cz>
+ *
+ * [1] Gerard L. G. Sleijpen and Diederik R. Fokkema, "BiCGstab(l) for linear
+ *     equations involving unsymmetric matrices with complex spectrum",
+ *     Electronic Trans. on Numerical Analysis 1, 11-32 (1993).
+ * [2] Gerard L. G. Sleijpen, Henk A. van der Vorst, and Diederik R. Fokkema,
+ *     "BiCGstab(l) and other Hybrid Bi-CG Methods", Numerical Algorithms 7,
+ *     75-109 (1994).
+ * [3] Henk A. van der Vorst, "Bi-CGSTAB: A fast and smoothly converging variant
+ *     of Bi-CG for the solution of nonsymmetric linear systems, SIAM Journal on
+ *     scientific and Statistical Computing 13.2, 631-644 (1992).
+ * [4] Martin H. Gutknecht, "Variants of BiCGStab for matrices with complex
+ *     spectrum", IPS Research Report No. 91-14 (1991).
+ *
+ * TODO: further variations to explore:
+ *
+ * [5] Gerard L. G. Sleijpen and Henk A. van der Vorst, "Reliable updated
+ *     residuals in hybrid Bi-CG methods", Computing 56 (2), 141-163 (1996).
+ * [6] Gerard L. G. Sleijpen and Henk A. van der Vorst, "Maintaining convergence
+ *     properties of BiCGstab methods in finite precision arithmetic", Numerical
+ *     Algorithms 10, 203-223 (1995).
+ */
+#pragma once
+#include <math.h>
+#include <TNL/Object.h>
+#include <TNL/SharedPointer.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
+#include <TNL/Solvers/IterativeSolver.h>
+#include <TNL/Solvers/Linear/LinearResidueGetter.h>
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+template< typename Matrix,
+          typename Preconditioner = Preconditioners::Dummy< typename Matrix :: RealType,
+                                                            typename Matrix :: DeviceType,
+                                                            typename Matrix :: IndexType> >
+class BICGStabL
+   : public Object,
+     public IterativeSolver< typename Matrix :: RealType,
+                             typename Matrix :: IndexType >
+   typedef typename Matrix::RealType RealType;
+   typedef typename Matrix::IndexType IndexType;
+   typedef typename Matrix::DeviceType DeviceType;
+   typedef Matrix MatrixType;
+   typedef Preconditioner PreconditionerType;
+   typedef SharedPointer< const MatrixType, DeviceType > MatrixPointer;
+   typedef SharedPointer< const PreconditionerType, DeviceType > PreconditionerPointer;
+   typedef Containers::Vector< RealType, DeviceType, IndexType > DeviceVector;
+   typedef Containers::Vector< RealType, Devices::Host, IndexType > HostVector;
+   BICGStabL();
+   String getType() const;
+   static void configSetup( Config::ConfigDescription& config,
+                            const String& prefix = "" );
+   bool setup( const Config::ParameterContainer& parameters,
+               const String& prefix = "" );
+   void setMatrix( const MatrixPointer& matrix );
+   void setPreconditioner( const PreconditionerPointer& preconditioner );
+   template< typename Vector,
+             typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
+   bool solve( const Vector& b, Vector& x );
+   void setSize( IndexType size );
+   int ell = 1;
+   bool exact_residue = false;
+   // matrices (in column-major format)
+   DeviceVector R, U;
+   // single vectors
+   DeviceVector r_ast, M_tmp, res_tmp;
+   // host-only storage
+   HostVector T, sigma, g_0, g_1, g_2;
+   IndexType size, ldSize;
+   MatrixPointer matrix;
+   PreconditionerPointer preconditioner;
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
+#include <TNL/Solvers/Linear/BICGStabL_impl.h>
diff --git a/src/TNL/Solvers/Linear/BICGStabL_impl.h b/src/TNL/Solvers/Linear/BICGStabL_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..f448e7c8835c49dd72a31e4b8e689dd09d825f85
--- /dev/null
+++ b/src/TNL/Solvers/Linear/BICGStabL_impl.h
@@ -0,0 +1,323 @@
+                          BICGStabL.h  -  description
+                             -------------------
+    begin                : Jul 4, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include "BICGStabL.h"
+#include <TNL/Matrices/MatrixOperations.h>
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::BICGStabL()
+   /****
+    * Clearing the shared pointer means that there is no
+    * preconditioner set.
+    */
+   this->preconditioner.clear();
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::getType() const
+   return String( "BICGStabL< " ) +
+          this->matrix -> getType() + ", " +
+          this->preconditioner -> getType() + " >";
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::
+configSetup( Config::ConfigDescription& config,
+             const String& prefix )
+   //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
+   config.addEntry< int >( prefix + "bicgstab-ell", "Number of Bi-CG iterations before the MR part starts.", 1 );
+   config.addEntry< bool >( prefix + "bicgstab-exact-residue", "Whether the BiCGstab should compute the exact residue in each step (true) or to use a cheap approximation (false).", false );
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::
+setup( const Config::ParameterContainer& parameters,
+       const String& prefix )
+   ell = parameters.getParameter< int >( "bicgstab-ell" );
+   exact_residue = parameters.getParameter< bool >( "bicgstab-exact-residue" );
+   return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::setMatrix( const MatrixPointer& matrix )
+   this->matrix = matrix;
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::setPreconditioner( const PreconditionerPointer& preconditioner )
+   this->preconditioner = preconditioner;
+template< typename Matrix,
+          typename Preconditioner >
+   template< typename Vector, typename ResidueGetter >
+BICGStabL< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
+   this->setSize( matrix->getRows() );
+   RealType alpha, beta, gamma, rho_0, rho_1, omega, b_norm;
+   DeviceVector r_0, r_j, r_i, u_0, Au, u;
+   r_0.bind( R.getData(), size );
+   u_0.bind( U.getData(), size );
+   auto matvec = [this]( const DeviceVector& src, DeviceVector& dst )
+   {
+      if( preconditioner ) {
+         matrix->vectorProduct( src, M_tmp );
+         preconditioner->solve( M_tmp, dst );
+      }
+      else {
+         matrix->vectorProduct( src, dst );
+      }
+   };
+   if( preconditioner ) {
+      preconditioner->solve( b, M_tmp );
+      b_norm = M_tmp.lpNorm( ( RealType ) 2.0 );
+      matrix->vectorProduct( x, M_tmp );
+      M_tmp.addVector( b, 1.0, -1.0 );
+      preconditioner->solve( M_tmp, r_0 );
+   }
+   else {
+      b_norm = b.lpNorm( 2.0 );
+      matrix->vectorProduct( x, r_0 );
+      r_0.addVector( b, 1.0, -1.0 );
+   }
+   sigma[ 0 ] = r_0.lpNorm( 2.0 );
+   if( std::isnan( sigma[ 0 ] ) )
+      throw std::runtime_error( "BiCGstab(ell): initial residue is NAN" );
+   r_ast = r_0;
+   r_ast /= sigma[ 0 ];
+   rho_0 = 1.0;
+   alpha = 0.0;
+   omega = 1.0;
+   u_0.setValue( 0.0 );
+   if( b_norm == 0.0 )
+       b_norm = 1.0;
+   this->resetIterations();
+   this->setResidue( sigma[ 0 ] / b_norm );
+   while( this->checkNextIteration() )
+   {
+      rho_0 = - omega * rho_0;
+      /****
+       * Bi-CG part
+       */
+      for( int j = 0; j < ell; j++ ) {
+         this->nextIteration();
+         r_j.bind( &R.getData()[ j * ldSize ], size );
+         rho_1 = r_ast.scalarProduct( r_j );
+         beta = alpha * rho_1 / rho_0;
+         rho_0 = rho_1;
+         /****
+          * U_[0:j] := R_[0:j] - beta * U_[0:j]
+          */
+         MatrixOperations< DeviceType >::
+            geam( size, j + 1,
+                  1.0, R.getData(), ldSize,
+                  -beta, U.getData(), ldSize,
+                  U.getData(), ldSize );
+         /****
+          * u_{j+1} = A u_j
+          */
+         u.bind( &U.getData()[ j * ldSize ], size );
+         Au.bind( &U.getData()[ (j + 1) * ldSize ], size );
+         matvec( u, Au );
+         gamma = r_ast.scalarProduct( Au );
+         alpha = rho_0 / gamma;
+         /****
+          * R_[0:j] := R_[0:j] - alpha * U_[1:j+1]
+          */
+         MatrixOperations< DeviceType >::
+            geam( size, j + 1,
+                  1.0, R.getData(), ldSize,
+                  -alpha, U.getData() + ldSize, ldSize,
+                  R.getData(), ldSize );
+         /****
+          * r_{j+1} = A r_j
+          */
+         r_j.bind( &R.getData()[ j * ldSize ], size );
+         r_i.bind( &R.getData()[ (j + 1) * ldSize ], size );
+         matvec( r_j, r_i );
+         /****
+          * x_0 := x_0 + alpha * u_0
+          */
+         x.addVector( u_0, alpha );
+      }
+      /****
+       * MGS part
+       */
+      for( int j = 1; j <= ell; j++ ) {
+         r_j.bind( &R.getData()[ j * ldSize ], size );
+         // MGS without reorthogonalization
+         for( int i = 1; i < j; i++ ) {
+            r_i.bind( &R.getData()[ i * ldSize ], size );
+            /****
+             * T_{i,j} = (r_i, r_j) / sigma_i
+             * r_j := r_j - T_{i,j} * r_i
+             */
+            const int ij = (i-1) + (j-1) * ell;
+            T[ ij ] = r_i.scalarProduct( r_j ) / sigma[ i ];
+            r_j.addVector( r_i, -T[ ij ] );
+         }
+         // MGS with reorthogonalization
+//         for( int i = 1; i < j; i++ ) {
+//            const int ij = (i-1) + (j-1) * ell;
+//            T[ ij ] = 0.0;
+//         }
+//         for( int l = 0; l < 2; l++ )
+//            for( int i = 1; i < j; i++ ) {
+//               r_i.bind( &R.getData()[ i * ldSize ], size );
+//               /****
+//                * T_{i,j} = (r_i, r_j) / sigma_i
+//                * r_j := r_j - T_{i,j} * r_i
+//                */
+//               const int ij = (i-1) + (j-1) * ell;
+//               const RealType T_ij = r_i.scalarProduct( r_j ) / sigma[ i ];
+//               T[ ij ] += T_ij;
+//               r_j.addVector( r_i, -T_ij );
+//            }
+         sigma[ j ] = r_j.scalarProduct( r_j );
+         g_1[ j ] = r_0.scalarProduct( r_j ) / sigma[ j ];
+      }
+      omega = g_1[ ell ];
+      /****
+       * g_0 = T^{-1} g_1
+       */
+      for( int j = ell; j >= 1; j-- ) {
+         g_0[ j ] = g_1[ j ];
+         for( int i = j + 1; i <= ell; i++ )
+            g_0[ j ] -= T[ (j-1) + (i-1) * ell ] * g_0[ i ];
+      }
+      /****
+       * g_2 = T * S * g_0,
+       * where S e_1 = 0, S e_j = e_{j-1} for j = 2, ... ell
+       */
+      for( int j = 1; j < ell; j++ ) {
+         g_2[ j ] = g_0[ j + 1 ];
+         for( int i = j + 1; i < ell; i++ )
+            g_2[ j ] += T[ (j-1) + (i-1) * ell ] * g_0[ i + 1 ];
+      }
+      /****
+       * Final updates
+       */
+      // x := x + R_[0:ell-1] * g_2
+      g_2[ 0 ] = g_0[ 1 ];
+      MatrixOperations< DeviceType >::gemv( size, ell,
+                                            1.0, R.getData(), ldSize, g_2.getData(),
+                                            1.0, x.getData() );
+      // r_0 := r_0 - R_[1:ell] * g_1_[1:ell]
+      MatrixOperations< DeviceType >::gemv( size, ell,
+                                            -1.0, R.getData() + ldSize, ldSize, &g_1[ 1 ],
+                                            1.0, r_0.getData() );
+      // u_0 := u_0 - U_[1:ell] * g_0_[1:ell]
+      MatrixOperations< DeviceType >::gemv( size, ell,
+                                            -1.0, U.getData() + ldSize, ldSize, &g_0[ 1 ],
+                                            1.0, u_0.getData() );
+      if( exact_residue ) {
+         /****
+          * Compute the exact preconditioned residue into the 's' vector.
+          */
+         if( preconditioner ) {
+            matrix->vectorProduct( x, M_tmp );
+            M_tmp.addVector( b, 1.0, -1.0 );
+            preconditioner->solve( M_tmp, res_tmp );
+         }
+         else {
+            matrix->vectorProduct( x, res_tmp );
+            res_tmp.addVector( b, 1.0, -1.0 );
+         }
+         sigma[ 0 ] = res_tmp.lpNorm( 2.0 );
+         this->setResidue( sigma[ 0 ] / b_norm );
+      }
+      else {
+         /****
+          * Use the "orthogonal residue vector" for stopping.
+          */
+         sigma[ 0 ] = r_0.lpNorm( 2.0 );
+         this->setResidue( sigma[ 0 ] / b_norm );
+      }
+   }
+   this->refreshSolverMonitor( true );
+   return this->checkConvergence();
+template< typename Matrix,
+          typename Preconditioner >
+BICGStabL< Matrix, Preconditioner >::setSize( IndexType size )
+   this->size = ldSize = size;
+   R.setSize( (ell + 1) * ldSize );
+   U.setSize( (ell + 1) * ldSize );
+   r_ast.setSize( size );
+   M_tmp.setSize( size );
+   if( exact_residue )
+      res_tmp.setSize( size );
+   T.setSize( ell * ell );
+   sigma.setSize( ell + 1 );
+   g_0.setSize( ell + 1 );
+   g_1.setSize( ell + 1 );
+   g_2.setSize( ell + 1 );
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/BICGStab_impl.h b/src/TNL/Solvers/Linear/BICGStab_impl.h
index a86af13ca545d6da4cf88cd7f967177028da9f21..adf744b50da033cd102b778189eacd68a22f9a38 100644
--- a/src/TNL/Solvers/Linear/BICGStab_impl.h
+++ b/src/TNL/Solvers/Linear/BICGStab_impl.h
@@ -55,7 +55,7 @@ BICGStab< Matrix, Preconditioner >::
 setup( const Config::ParameterContainer& parameters,
        const String& prefix )
-   exact_residue = parameters.getParameter< int >( "bicgstab-exact-residue" );
+   exact_residue = parameters.getParameter< bool >( "bicgstab-exact-residue" );
    return IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
@@ -78,8 +78,7 @@ template< typename Matrix,
    template< typename Vector, typename ResidueGetter >
 bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
-   if( ! this->setSize( matrix->getRows() ) )
-      return false;
+   this->setSize( matrix->getRows() );
    RealType alpha, beta, omega, aux, rho, rho_old, b_norm;
@@ -193,21 +192,15 @@ bool BICGStab< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
 template< typename Matrix,
           typename Preconditioner >
-bool BICGStab< Matrix, Preconditioner > :: setSize( IndexType size )
+void BICGStab< Matrix, Preconditioner > :: setSize( IndexType size )
-   if( ! r.setSize( size ) ||
-       ! r_ast.setSize( size ) ||
-       ! p.setSize( size ) ||
-       ! s.setSize( size ) ||
-       ! Ap.setSize( size ) ||
-       ! As.setSize( size ) ||
-       ! M_tmp.setSize( size ) )
-   {
-      std::cerr << "I am not able to allocate all supporting arrays for the BICGStab solver." << std::endl;
-      return false;
-   }
-   return true;
+   r.setSize( size );
+   r_ast.setSize( size );
+   p.setSize( size );
+   s.setSize( size );
+   Ap.setSize( size );
+   As.setSize( size );
+   M_tmp.setSize( size );
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/CG.h b/src/TNL/Solvers/Linear/CG.h
index 2a7b2403da80dc9f0ad77d2ef3936a1fed8fb4a5..670303873d401e489ae55740b35905ab1914371b 100644
--- a/src/TNL/Solvers/Linear/CG.h
+++ b/src/TNL/Solvers/Linear/CG.h
@@ -14,7 +14,6 @@
 #include <TNL/Object.h>
 #include <TNL/SharedPointer.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 #include <TNL/Solvers/IterativeSolver.h>
 #include <TNL/Solvers/Linear/LinearResidueGetter.h>
@@ -60,11 +59,9 @@ class CG : public Object,
              typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
    bool solve( const Vector& b, Vector& x );
-   ~CG();
-   bool setSize( IndexType size );
+   void setSize( IndexType size );
    Containers::Vector< RealType, DeviceType, IndexType >  r, new_r, p, Ap;
diff --git a/src/TNL/Solvers/Linear/CG_impl.h b/src/TNL/Solvers/Linear/CG_impl.h
index e8b0f40056b37ee56e61329c075889b2372d0843..4c11c224e5c0b4f73c45a82146862473b9fc6488 100644
--- a/src/TNL/Solvers/Linear/CG_impl.h
+++ b/src/TNL/Solvers/Linear/CG_impl.h
@@ -77,7 +77,7 @@ bool
 CG< Matrix, Preconditioner >::
 solve( const Vector& b, Vector& x )
-   if( ! this->setSize( matrix->getRows() ) ) return false;
+   this->setSize( matrix->getRows() );
    this->setResidue( this->getConvergenceResidue() + 1.0 );
@@ -147,28 +147,17 @@ solve( const Vector& b, Vector& x )
    this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
-template< typename Matrix,
-          typename Preconditioner >
-CG< Matrix, Preconditioner > :: ~CG()
 template< typename Matrix,
           typename Preconditioner >
-bool CG< Matrix, Preconditioner > :: setSize( IndexType size )
+void CG< Matrix, Preconditioner > :: setSize( IndexType size )
-   if( ! r. setSize( size ) ||
-       ! new_r. setSize( size ) ||
-       ! p. setSize( size ) ||
-       ! Ap. setSize( size ) )
-   {
-      std::cerr << "I am not able to allocated all supporting arrays for the CG solver." << std::endl;
-      return false;
-   }
-   return true;
+   r.setSize( size );
+   new_r.setSize( size );
+   p.setSize( size );
+   Ap.setSize( size );
 } // namespace Linear
 } // namespace Solvers
diff --git a/src/TNL/Solvers/Linear/CMakeLists.txt b/src/TNL/Solvers/Linear/CMakeLists.txt
old mode 100755
new mode 100644
index 01e22b367b23872fcddb76ba6d6c7a42d1af36a6..2321264a86c29551aeefffddcf930c9b3eac8077
--- a/src/TNL/Solvers/Linear/CMakeLists.txt
+++ b/src/TNL/Solvers/Linear/CMakeLists.txt
@@ -2,6 +2,8 @@ ADD_SUBDIRECTORY( Preconditioners )
 SET( headers BICGStab.h
+             BICGStabL.h
+             BICGStabL_impl.h
diff --git a/src/TNL/Solvers/Linear/CWYGMRES.h b/src/TNL/Solvers/Linear/CWYGMRES.h
index 13f8e514e537f27e9070020f23d530cf5f350a82..fd7b4dbef3af1fe71202d6942403ba244a2d43d3 100644
--- a/src/TNL/Solvers/Linear/CWYGMRES.h
+++ b/src/TNL/Solvers/Linear/CWYGMRES.h
@@ -108,7 +108,7 @@ protected:
                             RealType& sn );
-   bool setSize( IndexType _size, IndexType m );
+   void setSize( IndexType _size, IndexType m );
    // single vectors
    DeviceVector r, z, w, _M_tmp;
@@ -119,7 +119,7 @@ protected:
    // host-only storage for Givens rotations and the least squares problem
    HostVector cs, sn, H, s;
-   IndexType size, ldSize, restarting;
+   IndexType size, ldSize, restarting_min, restarting_max, restarting_step_min, restarting_step_max;
    MatrixPointer matrix;
    PreconditionerPointer preconditioner;
diff --git a/src/TNL/Solvers/Linear/CWYGMRES_impl.h b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
index f15f4ab83532737e8fb712b9ce3dce626feb6d41..01f966faa5b1df7298bdd603db893b79210ec310 100644
--- a/src/TNL/Solvers/Linear/CWYGMRES_impl.h
+++ b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
@@ -14,6 +14,7 @@
 #include <type_traits>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Containers/Algorithms/Multireduction.h>
 #include <TNL/Matrices/MatrixOperations.h>
@@ -29,7 +30,10 @@ CWYGMRES< Matrix, Preconditioner >::
 : size( 0 ),
   ldSize( 0 ),
-  restarting( 10 )
+  restarting_min( 10 ),
+  restarting_max( 10 ),
+  restarting_step_min( 3 ),
+  restarting_step_max( 3 )
     * Clearing the shared pointer means that there is no
@@ -64,7 +68,10 @@ configSetup( Config::ConfigDescription& config,
              const String& prefix )
    //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
-   config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the CWYGMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
+   config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
 template< typename Matrix,
@@ -75,7 +82,10 @@ setup( const Config::ParameterContainer& parameters,
        const String& prefix )
    IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-   this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) );
+   restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
+   this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
+   restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
+   restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
    return true;
@@ -87,7 +97,7 @@ setRestarting( IndexType rest )
    if( size != 0 )
       setSize( size, rest );
-   restarting = rest;
+   restarting_max = rest;
 template< typename Matrix,
@@ -115,18 +125,20 @@ bool
 CWYGMRES< Matrix, Preconditioner >::
 solve( const Vector& b, Vector& x )
-   TNL_ASSERT( matrix, std::cerr << "No matrix was set in CWYGMRES. Call setMatrix() before solve()." << std::endl );
-   if( restarting <= 0 )
+   TNL_ASSERT_TRUE( matrix, "No matrix was set in CWYGMRES. Call setMatrix() before solve()." );
+   if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max )
-      std::cerr << "I have wrong value for the restarting of the CWYGMRES solver. It is set to " << restarting
-           << ". Please set some positive value using the SetRestarting method." << std::endl;
+      std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
+                << ", r_max = " << restarting_max << std::endl;
       return false;
-   if( ! setSize( matrix -> getRows(), restarting ) )
+   if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max )
-      std::cerr << "I am not able to allocate enough memory for the CWYGMRES solver. You may try to decrease the restarting parameter." << std::endl;
-       return false;
+      std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min
+                << ", d_max = " << restarting_step_max << std::endl;
+      return false;
+   setSize( matrix -> getRows(), restarting_max );
    RealType normb( 0.0 ), beta( 0.0 );
@@ -160,10 +172,37 @@ solve( const Vector& b, Vector& x )
    this->setResidue( beta / normb );
+   // parameters for the adaptivity of the restarting parameter
+         RealType beta_ratio = 1;           // = beta / beta_ratio (small value indicates good convergence rate)
+   const RealType max_beta_ratio = 0.99;    // = cos(8°) \approx 0.99
+   const RealType min_beta_ratio = 0.175;   // = cos(80°) \approx 0.175
+         int restart_cycles = 0;    // counter of restart cycles
+         int m = restarting_max;    // current restarting parameter
    DeviceVector vi, vk;
    while( this->checkNextIteration() )
-      const IndexType m = restarting;
+      // adaptivity of the restarting parameter
+      // reference:  A.H. Baker, E.R. Jessup, Tz.V. Kolev - A simple strategy for varying the restart parameter in GMRES(m)
+      //             http://www.sciencedirect.com/science/article/pii/S0377042709000132
+      if( restart_cycles > 0 ) {
+         if( beta_ratio > max_beta_ratio )
+            // near stagnation -> set maximum
+            m = restarting_max;
+         else if( beta_ratio >= min_beta_ratio ) {
+            // the step size is determined based on current m using linear interpolation
+            // between restarting_step_min and restarting_step_max
+            const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) /
+                                                           ( restarting_max - restarting_min ) *
+                                                           ( m - restarting_min );
+            if( m - step >= restarting_min )
+               m -= step;
+            else
+               // set restarting_max when we hit restarting_min (see Baker et al. (2009))
+               m = restarting_max;
+         }
+//         std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << "    " << std::endl;
+      }
        * z = r / | r | =  1.0 / beta * r
@@ -295,6 +334,7 @@ solve( const Vector& b, Vector& x )
        * r = M.solve(b - A * x);
+      const RealType beta_old = beta;
       if( preconditioner )
          matrix->vectorProduct( x, _M_tmp );
@@ -313,6 +353,9 @@ solve( const Vector& b, Vector& x )
 //      cout << " beta = " << beta << endl;
 //      cout << "residue = " << beta / normb << endl;
+      // update parameters for the adaptivity of the restarting parameter
+      ++restart_cycles;
+      beta_ratio = beta / beta_old;
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
@@ -375,9 +418,9 @@ hauseholder_generate( DeviceVector& Y,
                                                             size );
-      checkCudaDevice;
-      CudaSupportMissingMessage;
+      throw Exceptions::CudaSupportMissing();
@@ -401,7 +444,7 @@ hauseholder_generate( DeviceVector& Y,
    // assuming it's stable enough...
    const RealType t_i = 2.0 / (norm_yi * norm_yi);
-   T[ i + i * (restarting + 1) ] = t_i;
+   T[ i + i * (restarting_max + 1) ] = t_i;
    if( i > 0 ) {
       // aux = Y_{i-1}^T * y_i
       RealType aux[ i ];
@@ -421,9 +464,9 @@ hauseholder_generate( DeviceVector& Y,
       // [T_i]_{0..i-1} = - T_{i-1} * t_i * aux
       for( int k = 0; k < i; k++ ) {
-         T[ k + i * (restarting + 1) ] = 0.0;
+         T[ k + i * (restarting_max + 1) ] = 0.0;
          for( int j = k; j < i; j++ )
-            T[ k + i * (restarting + 1) ] -= T[ k + j * (restarting + 1) ] * (t_i * aux[ j ]);
+            T[ k + i * (restarting_max + 1) ] -= T[ k + j * (restarting_max + 1) ] * (t_i * aux[ j ]);
@@ -441,7 +484,7 @@ hauseholder_apply_trunc( HostVector& out,
    DeviceVector y_i;
    y_i.bind( &Y.getData()[ i * ldSize ], size );
-   const RealType aux = T[ i + i * (restarting + 1) ] * y_i.scalarProduct( z );
+   const RealType aux = T[ i + i * (restarting_max + 1) ] * y_i.scalarProduct( z );
    if( std::is_same< DeviceType, Devices::Host >::value ) {
       for( int k = 0; k <= i; k++ )
          out[ k ] = z[ k ] - y_i[ k ] * aux;
@@ -449,9 +492,9 @@ hauseholder_apply_trunc( HostVector& out,
    if( std::is_same< DeviceType, Devices::Cuda >::value ) {
       // copy part of y_i to buffer on host
       // here we duplicate the upper (m+1)x(m+1) submatrix of Y on host for fast access
-      RealType* host_yi = &YL[ i * (restarting + 1) ];
+      RealType* host_yi = &YL[ i * (restarting_max + 1) ];
       RealType host_z[ i + 1 ];
-      if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting + 1 ) ||
+      if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ) ||
           ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ) )
          std::cerr << "Failed to copy part of device vectors y_i or z to host buffer." << std::endl;
@@ -480,7 +523,7 @@ hauseholder_cwy( DeviceVector& v,
    if( std::is_same< DeviceType, Devices::Cuda >::value ) {
       // the upper (m+1)x(m+1) submatrix of Y is duplicated on host for fast access
       for( int k = 0; k <= i; k++ )
-         aux[ k ] = YL[ i + k * (restarting + 1) ];
+         aux[ k ] = YL[ i + k * (restarting_max + 1) ];
    // aux = T_i * aux
@@ -488,7 +531,7 @@ hauseholder_cwy( DeviceVector& v,
    for( int k = 0; k <= i; k++ ) {
       RealType aux2 = 0.0;
       for( int j = k; j <= i; j++ )
-         aux2 += T[ k + j * (restarting + 1) ] * aux[ j ];
+         aux2 += T[ k + j * (restarting_max + 1) ] * aux[ j ];
       aux[ k ] = aux2;
@@ -530,7 +573,7 @@ hauseholder_cwy_transposed( DeviceVector& z,
    for( int k = i; k >= 0; k-- ) {
       RealType aux2 = 0.0;
       for( int j = 0; j <= k; j++ )
-         aux2 += T[ j + k * (restarting + 1) ] * aux[ j ];
+         aux2 += T[ j + k * (restarting_max + 1) ] * aux[ j ];
       aux[ k ] = aux2;
@@ -629,9 +672,10 @@ applyPlaneRotation( RealType& dx,
 template< typename Matrix,
           typename Preconditioner >
-bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m )
+void CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m )
-   if( size == _size && restarting == m ) return true;
+   if( size == _size && restarting_max == m )
+      return;
    size = _size;
    if( std::is_same< DeviceType, Devices::Cuda >::value )
       // align each column to 256 bytes - optimal for CUDA
@@ -639,24 +683,19 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m
        // on the host, we add 1 to disrupt the cache false-sharing pattern
       ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1;
-   restarting = m;
-   if( ! r.setSize( size ) ||
-       ! z.setSize( size ) ||
-       ! w.setSize( size ) ||
-       ! V.setSize( ldSize * ( restarting + 1 ) ) ||
-       ! Y.setSize( ldSize * ( restarting + 1 ) ) ||
-       ! T.setSize( (restarting + 1) * (restarting + 1) ) ||
-       ! YL.setSize( (restarting + 1) * (restarting + 1) ) ||
-       ! cs.setSize( restarting + 1 ) ||
-       ! sn.setSize( restarting + 1 ) ||
-       ! H.setSize( ( restarting + 1 ) * restarting ) ||
-       ! s.setSize( restarting + 1 ) ||
-       ! _M_tmp.setSize( size ) )
-   {
-      std::cerr << "I could not allocate all supporting arrays for the CWYGMRES solver." << std::endl;
-      return false;
-   }
-   return true;
+   restarting_max = m;
+   r.setSize( size );
+   z.setSize( size );
+   w.setSize( size );
+   V.setSize( ldSize * ( m + 1 ) );
+   Y.setSize( ldSize * ( m + 1 ) );
+   T.setSize( (m + 1) * (m + 1) );
+   YL.setSize( (m + 1) * (m + 1) );
+   cs.setSize( m + 1 );
+   sn.setSize( m + 1 );
+   H.setSize( ( m + 1 ) * m );
+   s.setSize( m + 1 );
+   _M_tmp.setSize( size );
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h
index 95fdbd9e3f9ceb80792ff7e6c90d537c13257a09..845f9399503c0981f0f608e9b685ce8be508b594 100644
--- a/src/TNL/Solvers/Linear/GMRES.h
+++ b/src/TNL/Solvers/Linear/GMRES.h
@@ -82,12 +82,12 @@ protected:
                             RealType& sn );
-   bool setSize( IndexType _size, IndexType m );
+   void setSize( IndexType _size, IndexType m );
    Containers::Vector< RealType, DeviceType, IndexType > _r, w, _v, _M_tmp;
    Containers::Vector< RealType, Devices::Host, IndexType > _s, _cs, _sn, _H;
-   IndexType size, restarting;
+   IndexType size, restarting_min, restarting_max, restarting_step_min, restarting_step_max;
    MatrixPointer matrix;
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.cpp b/src/TNL/Solvers/Linear/GMRES_impl.cpp
index 3bf16b3ee99df843aaa1f402497c377c7ca81f02..de639a5c219d0743951e0ebac8abb2a28b726864 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.cpp
+++ b/src/TNL/Solvers/Linear/GMRES_impl.cpp
@@ -8,6 +8,8 @@
 /* See Copyright Notice in tnl/Copyright */
 #include <TNL/Solvers/Linear/GMRES.h>
 #include <TNL/Matrices/CSR.h>
 #include <TNL/Matrices/Ellpack.h>
@@ -53,3 +55,5 @@ template class GMRES< Matrices::Multidiagonal< double, Devices::Cuda, long int >
 } // namespace Linear
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h
index 15c887c16d1aa9665fe0102e3317fbbd6b7d67a1..d3a20175926d9023228152f91a24ac04154fc80c 100644
--- a/src/TNL/Solvers/Linear/GMRES_impl.h
+++ b/src/TNL/Solvers/Linear/GMRES_impl.h
@@ -21,7 +21,10 @@ template< typename Matrix,
 GMRES< Matrix, Preconditioner >::
 : size( 0 ),
-  restarting( 10 )
+  restarting_min( 10 ),
+  restarting_max( 10 ),
+  restarting_step_min( 3 ),
+  restarting_step_max( 3 )
     * Clearing the shared pointer means that there is no
@@ -56,7 +59,10 @@ configSetup( Config::ConfigDescription& config,
              const String& prefix )
    //IterativeSolver< RealType, IndexType >::configSetup( config, prefix );
-   config.addEntry< int >( prefix + "gmres-restarting", "Number of iterations after which the GMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-min", "Minimal number of iterations after which the GMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-max", "Maximal number of iterations after which the GMRES restarts.", 10 );
+   config.addEntry< int >( prefix + "gmres-restarting-step-min", "Minimal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
+   config.addEntry< int >( prefix + "gmres-restarting-step-max", "Maximal adjusting step for the adaptivity of the GMRES restarting parameter.", 3 );
 template< typename Matrix,
@@ -67,7 +73,10 @@ setup( const Config::ParameterContainer& parameters,
        const String& prefix )
    IterativeSolver< RealType, IndexType >::setup( parameters, prefix );
-   this->setRestarting( parameters.getParameter< int >( "gmres-restarting" ) );
+   restarting_min = parameters.getParameter< int >( "gmres-restarting-min" );
+   this->setRestarting( parameters.getParameter< int >( "gmres-restarting-max" ) );
+   restarting_step_min = parameters.getParameter< int >( "gmres-restarting-step-min" );
+   restarting_step_max = parameters.getParameter< int >( "gmres-restarting-step-max" );
    return true;
@@ -79,7 +88,7 @@ setRestarting( IndexType rest )
    if( size != 0 )
       setSize( size, rest );
-   restarting = rest;
+   restarting_max = rest;
 template< typename Matrix,
@@ -107,18 +116,20 @@ bool
 GMRES< Matrix, Preconditioner >::
 solve( const Vector& b, Vector& x )
-   TNL_ASSERT( matrix, std::cerr << "No matrix was set in GMRES. Call setMatrix() before solve()." << std::endl );
-   if( restarting <= 0 )
+   TNL_ASSERT_TRUE( matrix, "No matrix was set in GMRES. Call setMatrix() before solve()." );
+   if( restarting_min <= 0 || restarting_max <= 0 || restarting_min > restarting_max )
-      std::cerr << "I have wrong value for the restarting of the GMRES solver. It is set to " << restarting
-           << ". Please set some positive value using the SetRestarting method." << std::endl;
+      std::cerr << "Wrong value for the GMRES restarting parameters: r_min = " << restarting_min
+                << ", r_max = " << restarting_max << std::endl;
       return false;
-   if( ! setSize( matrix -> getRows(), restarting ) )
+   if( restarting_step_min < 0 || restarting_step_max < 0 || restarting_step_min > restarting_step_max )
-       std::cerr << "I am not able to allocate enough memory for the GMRES solver. You may try to decrease the restarting parameter." << std::endl;
-       return false;
+      std::cerr << "Wrong value for the GMRES restarting adjustment parameters: d_min = " << restarting_step_min
+                << ", d_max = " << restarting_step_max << std::endl;
+      return false;
+   setSize( matrix -> getRows(), restarting_max );
    IndexType _size = size;
@@ -161,10 +172,38 @@ solve( const Vector& b, Vector& x )
    this->setResidue( beta / normb );
+   // parameters for the adaptivity of the restarting parameter
+         RealType beta_ratio = 1;           // = beta / beta_ratio (small value indicates good convergence rate)
+   const RealType max_beta_ratio = 0.99;    // = cos(8°) \approx 0.99
+   const RealType min_beta_ratio = 0.175;   // = cos(80°) \approx 0.175
+         int restart_cycles = 0;    // counter of restart cycles
+         int m = restarting_max;    // current restarting parameter
    Containers::Vector< RealType, DeviceType, IndexType > vi, vk;
    while( this->checkNextIteration() )
-      const IndexType m = restarting;
+      // adaptivity of the restarting parameter
+      // reference:  A.H. Baker, E.R. Jessup, Tz.V. Kolev - A simple strategy for varying the restart parameter in GMRES(m)
+      //             http://www.sciencedirect.com/science/article/pii/S0377042709000132
+      if( restarting_max > restarting_min && restart_cycles > 0 ) {
+         if( beta_ratio > max_beta_ratio )
+            // near stagnation -> set maximum
+            m = restarting_max;
+         else if( beta_ratio >= min_beta_ratio ) {
+            // the step size is determined based on current m using linear interpolation
+            // between restarting_step_min and restarting_step_max
+            const int step = restarting_step_min + (float) ( restarting_step_max - restarting_step_min ) /
+                                                           ( restarting_max - restarting_min ) *
+                                                           ( m - restarting_min );
+            if( m - step >= restarting_min )
+               m -= step;
+            else
+               // set restarting_max when we hit restarting_min (see Baker et al. (2009))
+               m = restarting_max;
+         }
+//         std::cerr << "restarting: cycle = " << restart_cycles << ", beta_ratio = " << beta_ratio << ", m = " << m << "    " << std::endl;
+      }
       for( IndexType i = 0; i < m + 1; i ++ )
          H[ i ] = s[ i ] = cs[ i ] = sn[ i ] = 0.0;
@@ -279,6 +318,7 @@ solve( const Vector& b, Vector& x )
        * r = M.solve(b - A * x);
+      const RealType beta_old = beta;
       beta = 0.0;
       if( preconditioner )
@@ -299,6 +339,9 @@ solve( const Vector& b, Vector& x )
       //cout << " beta = " << beta << std::endl;
       //cout << "residue = " << beta / normb << std::endl;
+      // update parameters for the adaptivity of the restarting parameter
+      ++restart_cycles;
+      beta_ratio = beta / beta_old;
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
@@ -385,26 +428,22 @@ applyPlaneRotation( RealType& dx,
 template< typename Matrix,
           typename Preconditioner >
 GMRES< Matrix, Preconditioner >::
 setSize( IndexType _size, IndexType m )
-   if( size == _size && restarting == m ) return true;
+   if( size == _size && restarting_max == m )
+      return;
    size = _size;
-   restarting = m;
-   if( ! _r.setSize( size ) ||
-       ! w.setSize( size ) ||
-       ! _s.setSize( restarting + 1 ) ||
-       ! _cs.setSize( restarting + 1 ) ||
-       ! _sn.setSize( restarting + 1 ) ||
-       ! _v.setSize( size * ( restarting + 1 ) ) ||
-       ! _H.setSize( ( restarting + 1 ) * restarting ) ||
-       ! _M_tmp.setSize( size ) )
-   {
-      std::cerr << "I could not allocate all supporting arrays for the GMRES solver." << std::endl;
-      return false;
-   }
-   return true;
+   restarting_max = m;
+   _r.setSize( size );
+   w.setSize( size );
+   _s.setSize( m + 1 );
+   _cs.setSize( m + 1 );
+   _sn.setSize( m + 1 );
+   _v.setSize( size * ( m + 1 ) );
+   _H.setSize( ( m + 1 ) * m );
+   _M_tmp.setSize( size );
 } // namespace Linear
diff --git a/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h b/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
index ccd70971c4a9937043f87a2c3d3005f049098b25..db81e861f7cd850ed151e305eb81c6915def7f4f 100644
--- a/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
+++ b/src/TNL/Solvers/Linear/LinearResidueGetter_impl.h
@@ -10,6 +10,10 @@
 #pragma once
+#include <cmath>
+#include <TNL/Solvers/Linear/LinearResidueGetter.h>
 namespace TNL {
 namespace Solvers {
 namespace Linear {   
diff --git a/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt b/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
old mode 100755
new mode 100644
index d14c10ad596625ba7f069f1697ec0737b8622212..43541c7ec74936497212c8436d95a2cbe03c3fcd
--- a/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
+++ b/src/TNL/Solvers/Linear/Preconditioners/CMakeLists.txt
@@ -1,6 +1,8 @@
 SET( headers Dummy.h
+             ILU0.h
+             ILU0_impl.h
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/TNL/Solvers/Linear/Preconditioners )
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
index 991b6a4858f9dcb7f58359835964d12d91c09c43..0c77239443c054c4429416c5c654ab53d8419a9e 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -57,7 +57,8 @@ update( const MatrixPointer& matrix )
 //  std::cout << getType() << "->setMatrix()" << std::endl;
-   TNL_ASSERT( matrix->getRows() > 0 && matrix->getRows() == matrix->getColumns(), );
+   TNL_ASSERT_GT( matrix->getRows(), 0, "empty matrix" );
+   TNL_ASSERT_EQ( matrix->getRows(), matrix->getColumns(), "matrix must be square" );
    if( diagonal.getSize() != matrix->getRows() )
       diagonal.setSize( matrix->getRows() );
@@ -81,7 +82,7 @@ update( const MatrixPointer& matrix )
             &matrix.template getData< Devices::Cuda >(),
             size );
-      checkCudaDevice;
@@ -112,7 +113,7 @@ solve( const Vector1& b, Vector2& x ) const
             size );
-      checkCudaDevice;
    return true;
diff --git a/src/TNL/Solvers/Linear/Preconditioners/Dummy.h b/src/TNL/Solvers/Linear/Preconditioners/Dummy.h
index 5629568c78b0bf1571c6a2d25a46d4bdc0f3199e..2a7283b22bd31f881b2d8a320b09ddc50b3bbe8a 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/Dummy.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Dummy.h
@@ -29,8 +29,7 @@ class Dummy
    template< typename Vector1, typename Vector2 >
    bool solve( const Vector1& b, Vector2& x ) const
-      TNL_ASSERT( false,
-              std::cerr << "The solve() method of a dummy preconditioner should not be called." << std::endl; );
+      TNL_ASSERT_TRUE( false, "The solve() method of a dummy preconditioner should not be called." );
       return true;
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
new file mode 100644
index 0000000000000000000000000000000000000000..651f36b73fc3f8773d2063ae7d0dec53f96f82f5
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -0,0 +1,206 @@
+                          ILU0.h  -  description
+                             -------------------
+    begin                : Dec 24, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include <type_traits>
+#include <TNL/Object.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/CSR.h>
+#ifdef HAVE_CUDA
+#include <cusparse.h>
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+namespace Preconditioners {
+template< typename Real, typename Device, typename Index >
+class ILU0
+template< typename Real, typename Index >
+class ILU0< Real, Devices::Host, Index >
+   typedef Real RealType;
+   typedef Devices::Host DeviceType;
+   typedef Index IndexType;
+   template< typename MatrixPointer >
+   void update( const MatrixPointer& matrixPointer );
+   template< typename Vector1, typename Vector2 >
+   bool solve( const Vector1& b, Vector2& x ) const;
+   String getType() const
+   {
+      return String( "ILU0" );
+   }
+//   Matrices::CSR< RealType, DeviceType, IndexType > A;
+   Matrices::CSR< RealType, DeviceType, IndexType > L;
+   Matrices::CSR< RealType, DeviceType, IndexType > U;
+class ILU0< double, Devices::Cuda, int >
+   using RealType = double;
+   using DeviceType = Devices::Cuda;
+   using IndexType = int;
+   ILU0()
+   {
+#ifdef HAVE_CUDA
+      cusparseCreate( &handle );
+   }
+   template< typename MatrixPointer >
+   void update( const MatrixPointer& matrixPointer );
+   template< typename Vector1, typename Vector2 >
+   bool solve( const Vector1& b, Vector2& x ) const;
+   String getType() const
+   {
+      return String( "ILU0" );
+   }
+   ~ILU0()
+   {
+#ifdef HAVE_CUDA
+      resetMatrices();
+      cusparseDestroy( handle );
+   }
+#ifdef HAVE_CUDA
+   Matrices::CSR< RealType, DeviceType, IndexType > A;
+   Containers::Vector< RealType, DeviceType, IndexType > y;
+   cusparseHandle_t handle;
+   cusparseMatDescr_t descr_A = 0;
+   cusparseMatDescr_t descr_L = 0;
+   cusparseMatDescr_t descr_U = 0;
+   csrilu02Info_t     info_A  = 0;
+   csrsv2Info_t       info_L  = 0;
+   csrsv2Info_t       info_U  = 0;
+   const cusparseSolvePolicy_t policy_A = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
+   const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
+   const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
+   const cusparseOperation_t trans_L  = CUSPARSE_OPERATION_NON_TRANSPOSE;
+   const cusparseOperation_t trans_U  = CUSPARSE_OPERATION_NON_TRANSPOSE;
+   Containers::Array< char, DeviceType, int > pBuffer;
+   // scaling factor for triangular solves
+   const double alpha = 1.0;
+   void resetMatrices()
+   {
+      if( descr_A ) {
+         cusparseDestroyMatDescr( descr_A );
+         descr_A = 0;
+      }
+      if( descr_L ) {
+         cusparseDestroyMatDescr( descr_L );
+         descr_L = 0;
+      }
+      if( descr_U ) {
+         cusparseDestroyMatDescr( descr_U );
+         descr_U = 0;
+      }
+      if( info_A ) {
+         cusparseDestroyCsrilu02Info( info_A );
+         info_A = 0;
+      }
+      if( info_L ) {
+         cusparseDestroyCsrsv2Info( info_L );
+         info_L = 0;
+      }
+      if( info_U ) {
+         cusparseDestroyCsrsv2Info( info_U );
+         info_U = 0;
+      }
+      pBuffer.reset();
+   }
+   // TODO: extend Matrices::copySparseMatrix accordingly
+   template< typename Matrix,
+             typename = typename std::enable_if< ! std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type >
+   void copyMatrix( const Matrix& matrix )
+   {
+      typename Matrix::CudaType A_tmp;
+      A_tmp = matrix;
+      Matrices::copySparseMatrix( A, A_tmp );
+   }
+   template< typename Matrix,
+             typename = typename std::enable_if< std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type,
+             typename = void >
+   void copyMatrix( const Matrix& matrix )
+   {
+      Matrices::copySparseMatrix( A, matrix );
+   }
+#ifdef HAVE_MIC
+template< typename Real, typename Index >
+class ILU0< Real, Devices::MIC, Index >
+   typedef Real RealType;
+   typedef Devices::MIC DeviceType;
+   typedef Index IndexType;
+   template< typename MatrixPointer >
+   void update( const MatrixPointer& matrixPointer )
+   {
+      throw std::runtime_error("Not Iplemented yet for MIC");
+   }
+   template< typename Vector1, typename Vector2 >
+   bool solve( const Vector1& b, Vector2& x ) const
+   {
+      throw std::runtime_error("Not Iplemented yet for MIC");
+   }
+   String getType() const
+   {
+      return String( "ILU0" );
+   }
+//   Matrices::CSR< RealType, DeviceType, IndexType > A;
+   Matrices::CSR< RealType, DeviceType, IndexType > L;
+   Matrices::CSR< RealType, DeviceType, IndexType > U;
+} // namespace Preconditioners
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
+#include <TNL/Solvers/Linear/Preconditioners/ILU0_impl.h>
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..58d1c4ad8c9cee8572e05e83de81105b5a96086f
--- /dev/null
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h
@@ -0,0 +1,300 @@
+                          ILU0_impl.h  -  description
+                             -------------------
+    begin                : Dec 24, 2016
+    copyright            : (C) 2016 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+// Implemented by: Jakub Klinkovsky
+#pragma once
+#include "ILU0.h"
+#include <TNL/Exceptions/CudaSupportMissing.h>
+namespace TNL {
+namespace Solvers {
+namespace Linear {
+namespace Preconditioners {   
+template< typename Real, typename Index >
+   template< typename MatrixPointer >
+ILU0< Real, Devices::Host, Index >::
+update( const MatrixPointer& matrixPointer )
+   TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
+   TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
+   const IndexType N = matrixPointer->getRows();
+   L.setDimensions( N, N );
+   U.setDimensions( N, N );
+   // copy row lengths
+   typename decltype(L)::CompressedRowLengthsVector L_rowLengths;
+   typename decltype(U)::CompressedRowLengthsVector U_rowLengths;
+   L_rowLengths.setSize( N );
+   U_rowLengths.setSize( N );
+   for( IndexType i = 0; i < N; i++ ) {
+       const auto row = matrixPointer->getRow( i );
+       const auto max_length = matrixPointer->getRowLength( i );
+       IndexType L_entries = 0;
+       IndexType U_entries = 0;
+       for( IndexType j = 0; j < max_length; j++ ) {
+           const auto column = row.getElementColumn( j );
+           if( column < i )
+               L_entries++;
+           else if( column < N )
+              U_entries++;
+           else
+               break;
+       }
+      L_rowLengths[ i ] = L_entries;
+      U_rowLengths[ N - 1 - i ] = U_entries;
+   }
+   L.setCompressedRowLengths( L_rowLengths );
+   U.setCompressedRowLengths( U_rowLengths );
+   // Incomplete LU factorization
+   // The factors L and U are stored separately and the rows of U are reversed.
+   for( IndexType i = 0; i < N; i++ ) {
+      // copy all non-zero entries from A into L and U
+      const auto max_length = matrixPointer->getRowLength( i );
+      IndexType columns[ max_length ];
+      RealType values[ max_length ];
+      matrixPointer->getRowFast( i, columns, values );
+      const auto L_entries = L_rowLengths[ i ];
+      const auto U_entries = U_rowLengths[ N - 1 - i ];
+      L.setRow( i, columns, values, L_entries );
+      U.setRow( N - 1 - i, &columns[ L_entries ], &values[ L_entries ], U_entries );
+      // this condition is to avoid segfaults on empty L.getRow( i )
+      if( L_entries > 0 ) {
+         const auto L_i = L.getRow( i );
+         const auto U_i = U.getRow( N - 1 - i );
+         // loop for k = 0, ..., i - 2; but only over the non-zero entries
+         for( IndexType c_k = 0; c_k < L_entries; c_k++ ) {
+            const auto k = L_i.getElementColumn( c_k );
+            auto L_ik = L.getElementFast( i, k ) / U.getElementFast( N - 1 - k, k );
+            L.setElement( i, k, L_ik );
+            // loop for j = k+1, ..., N-1; but only over the non-zero entries
+            // and split into two loops over L and U separately
+            for( IndexType c_j = c_k + 1; c_j < L_entries; c_j++ ) {
+               const auto j = L_i.getElementColumn( c_j );
+               const auto L_ij = L.getElementFast( i, j ) - L_ik * U.getElementFast( N - 1 - k, j );
+               L.setElement( i, j, L_ij );
+            }
+            for( IndexType c_j = 0; c_j < U_entries; c_j++ ) {
+               const auto j = U_i.getElementColumn( c_j );
+               const auto U_ij = U.getElementFast( N - 1 - i, j ) - L_ik * U.getElementFast( N - 1 - k, j );
+               U.setElement( N - 1 - i, j, U_ij );
+            }
+         }
+      }
+   }
+template< typename Real, typename Index >
+   template< typename Vector1, typename Vector2 >
+ILU0< Real, Devices::Host, Index >::
+solve( const Vector1& b, Vector2& x ) const
+   TNL_ASSERT_EQ( b.getSize(), L.getRows(), "wrong size of the right hand side" );
+   TNL_ASSERT_EQ( x.getSize(), L.getRows(), "wrong size of the solution vector" );
+   const IndexType N = x.getSize();
+   // Step 1: solve y from Ly = b
+   for( IndexType i = 0; i < N; i++ ) {
+      x[ i ] = b[ i ];
+      const auto L_entries = L.getRowLength( i );
+      // this condition is to avoid segfaults on empty L.getRow( i )
+      if( L_entries > 0 ) {
+         const auto L_i = L.getRow( i );
+         // loop for j = 0, ..., i - 1; but only over the non-zero entries
+         for( IndexType c_j = 0; c_j < L_entries; c_j++ ) {
+            const auto j = L_i.getElementColumn( c_j );
+            x[ i ] -= L_i.getElementValue( c_j ) * x[ j ];
+         }
+      }
+   }
+   // Step 2: solve x from Ux = y
+   for( IndexType i = N - 1; i >= 0; i-- ) {
+      const IndexType U_idx = N - 1 - i;
+      const auto U_entries = U.getRowLength( U_idx );
+      const auto U_i = U.getRow( U_idx );
+      const auto U_ii = U_i.getElementValue( 0 );
+      // loop for j = i+1, ..., N-1; but only over the non-zero entries
+      for( IndexType c_j = 1; c_j < U_entries ; c_j++ ) {
+         const auto j = U_i.getElementColumn( c_j );
+         x[ i ] -= U_i.getElementValue( c_j ) * x[ j ];
+      }
+      x[ i ] /= U_ii;
+   }
+   return true;
+   template< typename MatrixPointer >
+ILU0< double, Devices::Cuda, int >::
+update( const MatrixPointer& matrixPointer )
+#ifdef HAVE_CUDA
+   // TODO: only numerical factorization has to be done every time, split the rest into separate "setup" method which is called less often
+   resetMatrices();
+   // Note: the decomposition will be in-place, matrices L and U will have the
+   // storage of A
+   copyMatrix( *matrixPointer );
+   const int m = A.getRows();
+   const int nnz = A.getValues().getSize();
+   y.setSize( m );
+   // create matrix descriptors
+   cusparseCreateMatDescr( &descr_A );
+   cusparseSetMatIndexBase( descr_A, CUSPARSE_INDEX_BASE_ZERO );
+   cusparseSetMatType( descr_A, CUSPARSE_MATRIX_TYPE_GENERAL );
+   cusparseCreateMatDescr( &descr_L );
+   cusparseSetMatIndexBase( descr_L, CUSPARSE_INDEX_BASE_ZERO );
+   cusparseSetMatType( descr_L, CUSPARSE_MATRIX_TYPE_GENERAL );
+   cusparseSetMatFillMode( descr_L, CUSPARSE_FILL_MODE_LOWER );
+   cusparseSetMatDiagType( descr_L, CUSPARSE_DIAG_TYPE_UNIT );
+   cusparseCreateMatDescr( &descr_U);
+   cusparseSetMatIndexBase( descr_U, CUSPARSE_INDEX_BASE_ZERO );
+   cusparseSetMatType( descr_U, CUSPARSE_MATRIX_TYPE_GENERAL );
+   cusparseSetMatFillMode( descr_U, CUSPARSE_FILL_MODE_UPPER );
+   cusparseSetMatDiagType( descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT );
+   // create info structures
+   cusparseCreateCsrilu02Info( &info_A );
+   cusparseCreateCsrsv2Info( &info_L );
+   cusparseCreateCsrsv2Info( &info_U );
+   // query how much memory will be needed in csrilu02 and csrsv2, and allocate the buffer
+   int pBufferSize_A, pBufferSize_L, pBufferSize_U;
+   cusparseDcsrilu02_bufferSize( handle, m, nnz, descr_A,
+                                 A.getValues().getData(),
+                                 A.getRowPointers().getData(),
+                                 A.getColumnIndexes().getData(),
+                                 info_A, &pBufferSize_A );
+   cusparseDcsrsv2_bufferSize( handle, trans_L, m, nnz, descr_L,
+                               A.getValues().getData(),
+                               A.getRowPointers().getData(),
+                               A.getColumnIndexes().getData(),
+                               info_L, &pBufferSize_L );
+   cusparseDcsrsv2_bufferSize( handle, trans_U, m, nnz, descr_U,
+                               A.getValues().getData(),
+                               A.getRowPointers().getData(),
+                               A.getColumnIndexes().getData(),
+                               info_U, &pBufferSize_U );
+   const int pBufferSize = max( pBufferSize_A, max( pBufferSize_L, pBufferSize_U ) );
+   pBuffer.setSize( pBufferSize );
+   // Symbolic analysis of the incomplete LU decomposition
+   cusparseDcsrilu02_analysis( handle, m, nnz, descr_A,
+                               A.getValues().getData(),
+                               A.getRowPointers().getData(),
+                               A.getColumnIndexes().getData(),
+                               info_A, policy_A, pBuffer.getData() );
+   int structural_zero;
+   cusparseStatus_t
+   status = cusparseXcsrilu02_zeroPivot( handle, info_A, &structural_zero );
+   if( CUSPARSE_STATUS_ZERO_PIVOT == status ) {
+      std::cerr << "A(" << structural_zero << ", " << structural_zero << ") is missing." << std::endl;
+      throw 1;
+   }
+   // Analysis for the triangular solves for L and U
+   // Trick: the lower (upper) triangular part of A has the same sparsity
+   // pattern as L (U), so we can do the analysis for csrsv2 on the matrix A.
+   cusparseDcsrsv2_analysis( handle, trans_L, m, nnz, descr_L,
+                             A.getValues().getData(),
+                             A.getRowPointers().getData(),
+                             A.getColumnIndexes().getData(),
+                             info_L, policy_L, pBuffer.getData() );
+   cusparseDcsrsv2_analysis( handle, trans_U, m, nnz, descr_U,
+                             A.getValues().getData(),
+                             A.getRowPointers().getData(),
+                             A.getColumnIndexes().getData(),
+                             info_U, policy_U, pBuffer.getData() );
+   // Numerical incomplete LU decomposition
+   cusparseDcsrilu02( handle, m, nnz, descr_A,
+                      A.getValues().getData(),
+                      A.getRowPointers().getData(),
+                      A.getColumnIndexes().getData(),
+                      info_A, policy_A, pBuffer.getData() );
+   int numerical_zero;
+   status = cusparseXcsrilu02_zeroPivot( handle, info_A, &numerical_zero );
+   if( CUSPARSE_STATUS_ZERO_PIVOT == status ) {
+      std::cerr << "A(" << numerical_zero << ", " << numerical_zero << ") is zero." << std::endl;
+      throw 1;
+   }
+   throw Exceptions::CudaSupportMissing();
+   template< typename Vector1, typename Vector2 >
+ILU0< double, Devices::Cuda, int >::
+solve( const Vector1& b, Vector2& x ) const
+#ifdef HAVE_CUDA
+   const int m = A.getRows();
+   const int nnz = A.getValues().getSize();
+   // Step 1: solve y from Ly = b
+   cusparseDcsrsv2_solve( handle, trans_L, m, nnz, &alpha, descr_L,
+                          A.getValues().getData(),
+                          A.getRowPointers().getData(),
+                          A.getColumnIndexes().getData(),
+                          info_L,
+                          b.getData(),
+                          (RealType*) y.getData(),
+                          policy_L, (void*) pBuffer.getData() );
+   // Step 2: solve x from Ux = y
+   cusparseDcsrsv2_solve( handle, trans_U, m, nnz, &alpha, descr_U,
+                          A.getValues().getData(),
+                          A.getRowPointers().getData(),
+                          A.getColumnIndexes().getData(),
+                          info_U,
+                          y.getData(),
+                          x.getData(),
+                          policy_U, (void*) pBuffer.getData() );
+   return true;
+   throw Exceptions::CudaSupportMissing();
+} // namespace Preconditioners
+} // namespace Linear
+} // namespace Solvers
+} // namespace TNL
diff --git a/src/TNL/Solvers/Linear/SOR.h b/src/TNL/Solvers/Linear/SOR.h
index d1159884c1dc98883d782db832c936da71e03186..fe36b65932aa0e4eed149a3e1f46a6acc9560c73 100644
--- a/src/TNL/Solvers/Linear/SOR.h
+++ b/src/TNL/Solvers/Linear/SOR.h
@@ -10,7 +10,6 @@
 #pragma once
-#include <math.h>
 #include <TNL/Object.h>
 #include <TNL/SharedPointer.h>
 #include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
@@ -61,8 +60,6 @@ class SOR : public Object,
              typename ResidueGetter = LinearResidueGetter< Matrix, Vector >  >
    bool solve( const Vector& b, Vector& x );
-   ~SOR();
    RealType omega;
@@ -76,4 +73,3 @@ class SOR : public Object,
 } // namespace TNL
 #include <TNL/Solvers/Linear/SOR_impl.h>
diff --git a/src/TNL/Solvers/Linear/SOR_impl.cpp b/src/TNL/Solvers/Linear/SOR_impl.cpp
index 294b819bcaa73724f0b289f7914fa7e1e2171084..4f4231e18cb7bb517ef09abfe57213e3fe924eda 100644
--- a/src/TNL/Solvers/Linear/SOR_impl.cpp
+++ b/src/TNL/Solvers/Linear/SOR_impl.cpp
@@ -8,6 +8,8 @@
 /* See Copyright Notice in tnl/Copyright */
 #include <TNL/Solvers/Linear/SOR.h>
 #include <TNL/Matrices/CSR.h>
 #include <TNL/Matrices/Ellpack.h>
@@ -54,4 +56,4 @@ template class SOR< Matrices::Multidiagonal< double, Devices::Cuda, long int > >
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/Linear/SOR_impl.h b/src/TNL/Solvers/Linear/SOR_impl.h
index d7d883f161342a4311caf8a7a99411fc36683d17..509221475dfc2b69c33198cb361fe51f7542a566 100644
--- a/src/TNL/Solvers/Linear/SOR_impl.h
+++ b/src/TNL/Solvers/Linear/SOR_impl.h
@@ -10,6 +10,8 @@
 #pragma once
+#include <TNL/Solvers/Linear/SOR.h>
 namespace TNL {
 namespace Solvers {
 namespace Linear {   
@@ -106,6 +108,7 @@ bool SOR< Matrix, Preconditioner > :: solve( const Vector& b, Vector& x )
                                       this->getOmega() );
+      // FIXME: the LinearResidueGetter works only on the host
       this->setResidue( ResidueGetter::getResidue( *matrix, x, b, bNorm ) );
@@ -114,15 +117,13 @@ bool SOR< Matrix, Preconditioner > :: solve( const Vector& b, Vector& x )
    return this->checkConvergence();
-template< typename Matrix, typename Preconditioner >
-SOR< Matrix, Preconditioner > :: ~SOR()
 } // namespace Linear
 } // namespace Solvers
 } // namespace TNL
 #include <TNL/Matrices/CSR.h>
 #include <TNL/Matrices/Ellpack.h>
 #include <TNL/Matrices/Multidiagonal.h>
@@ -175,3 +176,5 @@ extern template class SOR< tnlMutliDiagonalMatrix< double, Devices::Cuda, long i
 } // namespace Linear
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/Linear/TFQMR.h b/src/TNL/Solvers/Linear/TFQMR.h
index ec2ae65eeb927323f73c33e60057be73e6d8785b..abf37d737298e27395c1dbb4015f398582cf17a2 100644
--- a/src/TNL/Solvers/Linear/TFQMR.h
+++ b/src/TNL/Solvers/Linear/TFQMR.h
@@ -14,7 +14,6 @@
 #include <TNL/Object.h>
 #include <TNL/SharedPointer.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Containers/SharedVector.h>
 #include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 #include <TNL/Solvers/IterativeSolver.h>
 #include <TNL/Solvers/Linear/LinearResidueGetter.h>
@@ -62,7 +61,7 @@ class TFQMR : public Object,
-   bool setSize( IndexType size );
+   void setSize( IndexType size );
    Containers::Vector< RealType, DeviceType, IndexType > d, r, w, u, v, r_ast, Au, M_tmp;
diff --git a/src/TNL/Solvers/Linear/TFQMR_impl.h b/src/TNL/Solvers/Linear/TFQMR_impl.h
index 7fb2b7aa98b1d02a519e248a6a6ec6f35eef9a0c..7f6546a75caa6fe1dc4c09de47a694b46a4f02df 100644
--- a/src/TNL/Solvers/Linear/TFQMR_impl.h
+++ b/src/TNL/Solvers/Linear/TFQMR_impl.h
@@ -76,8 +76,7 @@ template< typename Matrix,
    template< typename Vector, typename ResidueGetter >
 bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
-   if( ! this->setSize( matrix -> getRows() ) )
-      return false;
+   this->setSize( matrix -> getRows() );
    RealType tau, theta, eta, rho, alpha, b_norm, w_norm;
@@ -174,32 +173,25 @@ bool TFQMR< Matrix, Preconditioner >::solve( const Vector& b, Vector& x )
    this->refreshSolverMonitor( true );
    return this->checkConvergence();
 template< typename Matrix,
           typename Preconditioner >
-bool TFQMR< Matrix, Preconditioner > :: setSize( IndexType size )
+void TFQMR< Matrix, Preconditioner > :: setSize( IndexType size )
    if( this->size == size )
-      return true;
+      return;
    this->size = size;
-   if( ! d. setSize( size ) ||
-       ! r. setSize( size ) ||
-       ! w. setSize( size ) ||
-       ! u. setSize( size ) ||
-       ! v. setSize( size ) ||
-       ! r_ast. setSize( size ) ||
-       ! Au. setSize( size ) ||
-       ! M_tmp. setSize( size ) )
-   {
-      std::cerr << "I am not able to allocate all supporting vectors for the TFQMR solver." << std::endl;
-      return false;
-   }
-   return true;
+   d.setSize( size );
+   r.setSize( size );
+   w.setSize( size );
+   u.setSize( size );
+   v.setSize( size );
+   r_ast.setSize( size );
+   Au.setSize( size );
+   M_tmp.setSize( size );
 } // namespace Linear
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h b/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
index a07143a573900b4369cae396e43ea4b1a0797cc4..ebc401d633e9e2792bcff1b16ebcb9bc88646a09 100644
--- a/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
+++ b/src/TNL/Solvers/Linear/UmfpackWrapper_impl.h
@@ -63,8 +63,9 @@ bool UmfpackWrapper< Matrices::CSR< double, Devices::Host, int >, Preconditioner
 solve( const Vector& b,
        Vector& x )
-    TNL_ASSERT( matrix->getRows() == matrix->getColumns(), );
-    TNL_ASSERT( matrix->getColumns() == x.getSize() && matrix->getColumns() == b.getSize(), );
+    TNL_ASSERT_EQ( matrix->getRows(), matrix->getColumns(), "matrix must be square" );
+    TNL_ASSERT_EQ( matrix->getColumns(), x.getSize(), "wrong size of the solution vector" );
+    TNL_ASSERT_EQ( matrix->getColumns(), b.getSize(), "wrong size of the right hand side" );
     const IndexType size = matrix -> getRows();
@@ -87,9 +88,9 @@ solve( const Vector& b,
     // symbolic reordering of the sparse matrix
     status = umfpack_di_symbolic( size, size,
-                                  matrix->getRowPointers(),
-                                  matrix->getColumnIndexes(),
-                                  matrix->getValues(),
+                                  matrix->getRowPointers().getData(),
+                                  matrix->getColumnIndexes().getData(),
+                                  matrix->getValues().getData(),
                                   &Symbolic, Control, Info );
     if( status != UMFPACK_OK ) {
         std::cerr << "error: symbolic reordering failed" << std::endl;
@@ -97,9 +98,9 @@ solve( const Vector& b,
     // numeric factorization
-    status = umfpack_di_numeric( matrix->getRowPointers(),
-                                 matrix->getColumnIndexes(),
-                                 matrix->getValues(),
+    status = umfpack_di_numeric( matrix->getRowPointers().getData(),
+                                 matrix->getColumnIndexes().getData(),
+                                 matrix->getValues().getData(),
                                  Symbolic, &Numeric, Control, Info );
     if( status != UMFPACK_OK ) {
         std::cerr << "error: numeric factorization failed" << std::endl;
@@ -108,9 +109,9 @@ solve( const Vector& b,
     // solve with specified right-hand-side
     status = umfpack_di_solve( system_type,
-                               matrix->getRowPointers(),
-                               matrix->getColumnIndexes(),
-                               matrix->getValues(),
+                               matrix->getRowPointers().getData(),
+                               matrix->getColumnIndexes().getData(),
+                               matrix->getValues().getData(),
                                Numeric, Control, Info );
diff --git a/src/TNL/Solvers/ODE/CMakeLists.txt b/src/TNL/Solvers/ODE/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Solvers/ODE/Euler_impl.h b/src/TNL/Solvers/ODE/Euler_impl.h
index ce90e16ec77286be4ca6c8187d14ebc51e6031e0..42f563b11438bce4cc7c625a011e134d59d27c1b 100644
--- a/src/TNL/Solvers/ODE/Euler_impl.h
+++ b/src/TNL/Solvers/ODE/Euler_impl.h
@@ -10,6 +10,8 @@
 #pragma once
+#include <TNL/Devices/MIC.h>
 namespace TNL {
 namespace Solvers {
 namespace ODE {
@@ -74,11 +76,7 @@ bool Euler< Problem > :: solve( DofVectorPointer& u )
     * First setup the supporting meshes k1...k5 and k_tmp.
-   if( ! k1->setLike( *u ) )
-   {
-      std::cerr << "I do not have enough memory to allocate a supporting grid for the Euler explicit solver." << std::endl;
-      return false;
-   }
+   k1->setLike( *u );
    k1->setValue( 0.0 );
@@ -203,12 +201,38 @@ void Euler< Problem > :: computeNewTimeLevel( DofVectorPointer& u,
                                                                       this->cudaBlockResidue.getData() );
          localResidue += this->cudaBlockResidue.sum();
-         checkCudaDevice;
-   localResidue /= tau * ( RealType ) size;
+   //MIC
+   if( std::is_same< DeviceType, Devices::MIC >::value )
+   {
+#ifdef HAVE_MIC
+      Devices::MICHider<RealType> mu;
+      mu.pointer=_u;
+      Devices::MICHider<RealType> mk1;
+      mk1.pointer=_k1;
+    #pragma offload target(mic) in(mu,mk1,size) inout(localResidue)
+    {
+      #pragma omp parallel for reduction(+:localResidue) firstprivate( mu, mk1 )  
+      for( IndexType i = 0; i < size; i ++ )
+      {
+         const RealType add = tau * mk1.pointer[ i ];
+         mu.pointer[ i ] += add;
+         localResidue += std::fabs( add );
+      }
+    }
+   }
+   localResidue /= tau * ( RealType ) size;   
    MPIAllreduce( localResidue, currentResidue, 1, MPI_SUM, this->solver_comm );
 #ifdef HAVE_CUDA
@@ -220,7 +244,7 @@ __global__ void updateUEuler( const Index size,
                               RealType* cudaBlockResidue )
    extern __shared__ RealType du[];
-   const Index blockOffset = blockIdx. x * blockDim. x;
+   const Index blockOffset = blockIdx. x * blockDim.x;
    const Index i = blockOffset  + threadIdx. x;
    if( i < size )
       u[ i ] += du[ threadIdx.x ] = tau * k1[ i ];
diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h
index cb8a7808bfa71e3ec42d5c57d43827f76e376abd..d206087f1cc3d5ba78a888ef916b06f40e76235a 100644
--- a/src/TNL/Solvers/ODE/Merson_impl.h
+++ b/src/TNL/Solvers/ODE/Merson_impl.h
@@ -133,19 +133,20 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
       std::cerr << "No problem was set for the Merson ODE solver." << std::endl;
       return false;
-   /****
-    * First setup the supporting meshes k1...k5 and kAux.
-    */
-   if( ! k1->setLike( *u ) ||
-       ! k2->setLike( *u ) ||
-       ! k3->setLike( *u ) ||
-       ! k4->setLike( *u ) ||
-       ! k5->setLike( *u ) ||
-       ! kAux->setLike( *u ) )
+   if( this->getTau() == 0.0 )
-      std::cerr << "I do not have enough memory to allocate supporting grids for the Merson explicit solver." << std::endl;
+      std::cerr << "The time step for the Merson ODE solver is zero." << std::endl;
       return false;
+   /****
+    * First setup the supporting meshes k1...k5 and kAux.
+    */
+   k1->setLike( *u );
+   k2->setLike( *u );
+   k3->setLike( *u );
+   k4->setLike( *u );
+   k5->setLike( *u );
+   kAux->setLike( *u );
    k1->setValue( 0.0 );
    k2->setValue( 0.0 );
    k3->setValue( 0.0 );
@@ -254,21 +255,12 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u,
     * Compute data transfers statistics
-#ifdef HAVE_NOT_CXX11
-   k1->template touch< IndexType >( 4 );
-   k2->template touch< IndexType >( 1 );
-   k3->template touch< IndexType >( 2 );
-   k4->template touch< IndexType >( 1 );
-   kAux->template touch< IndexType >( 4 );
-   u->template touch< IndexType >( 4 );
    k1->touch( 4 );
    k2->touch( 1 );
    k3->touch( 2 );
    k4->touch( 1 );
    kAux->touch( 4 );
    u->touch( 4 );
    RealType tau_3 = tau / 3.0;
@@ -372,17 +364,10 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
     * Compute data transfers statistics
-#ifdef HAVE_NOT_CXX11
-   k1->template touch< IndexType >();
-   k3->template touch< IndexType >();
-   k4->template touch< IndexType >();
-   k5->template touch< IndexType >();
    RealType eps( 0.0 ), maxEps( 0.0 );
    if( std::is_same< DeviceType, Devices::Host >::value )
@@ -453,17 +438,10 @@ void Merson< Problem >::computeNewTimeLevel( DofVectorPointer& u,
     * Compute data transfers statistics
-#ifdef HAVE_NOT_CXX11
-   u->template touch< IndexType >();
-   k1->template touch< IndexType >();
-   k4->template touch< IndexType >();
-   k5->template touch< IndexType >();
    if( std::is_same< DeviceType, Devices::Host >::value )
diff --git a/src/TNL/Solvers/PDE/CMakeLists.txt b/src/TNL/Solvers/PDE/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
index dff5f2e6c53b79ee348391d702fb4d53e32ef102..a2450e38d2acdd331da85a59ef3ca639c022660d 100644
--- a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h
@@ -122,7 +122,7 @@ solve( const RealType& time,
        DofVectorPointer& dofVector,
        MeshDependentDataPointer& meshDependentData )
-   TNL_ASSERT( this->odeSolver, );
+   TNL_ASSERT_TRUE( this->odeSolver, "ODE solver was not set" );
    this->odeSolver->setTau( this->timeStep );
    this->odeSolver->setProblem( * this );
diff --git a/src/TNL/Solvers/PDE/ExplicitUpdater.h b/src/TNL/Solvers/PDE/ExplicitUpdater.h
index cc4056b70eeb082154dedbf2e2c6318a8949a77a..264ecef7c0a83a3f6ead920de11a0225fb61cd36 100644
--- a/src/TNL/Solvers/PDE/ExplicitUpdater.h
+++ b/src/TNL/Solvers/PDE/ExplicitUpdater.h
@@ -122,12 +122,12 @@ class ExplicitUpdater
                                                  typename MeshFunction::IndexType > >::value != true,
             "Error: I am getting Vector instead of MeshFunction or similar object. You might forget to bind DofVector into MeshFunction in you method getExplicitUpdate."  );
-         TNL_ASSERT( this->userDataPointer->differentialOperator, 
-            std::cerr << "The differential operator is not correctly set-up. Use method setDifferentialOperator() to do it." << std::endl );
-         TNL_ASSERT( this->userDataPointer->boundaryConditions, 
-            std::cerr << "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." << std::endl );
-         TNL_ASSERT( this->userDataPointer->rightHandSide, 
-            std::cerr << "The right-hand side is not correctly set-up. Use method setRightHandSide() to do it." << std::endl );
+         TNL_ASSERT_TRUE( this->userDataPointer->differentialOperator,
+                          "The differential operator is not correctly set-up. Use method setDifferentialOperator() to do it." );
+         TNL_ASSERT_TRUE( this->userDataPointer->boundaryConditions, 
+                          "The boundary conditions are not correctly set-up. Use method setBoundaryCondtions() to do it." );
+         TNL_ASSERT_TRUE( this->userDataPointer->rightHandSide, 
+                          "The right-hand side is not correctly set-up. Use method setRightHandSide() to do it." );
          this->userDataPointer->time = time;
@@ -175,12 +175,23 @@ class ExplicitUpdater
                                               TraverserUserData& userData,
                                               const EntityType& entity )
+           /*    std::cerr<<"===========================================================" << std::endl; 
+               std::cerr<<"fu:" << userData.fu << std::endl; 
+               std::cerr<< "diffOp:" << userData.differentialOperator << std::endl; 
+               std::cerr<<"===========================================================" << std::endl; 
+               std::cerr<<std::flush;*/
+            //   int blabla;
+             //  std::cin >> blabla; 
                ( *userData.fu )( entity ) = 
-                  ( *userData.differentialOperator )( *userData.u, entity, userData.time );
+                       ( *userData.differentialOperator )( *userData.u, entity, userData.time );
                typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter;
                (  *userData.fu )( entity ) += 
                   FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time );
diff --git a/src/TNL/Solvers/PDE/LinearSystemAssembler.h b/src/TNL/Solvers/PDE/LinearSystemAssembler.h
index 6b96f0b66cda28c970a38153e5ec637892746e88..d97dd15ef530ec33833d016d4ed321a83d711df6 100644
--- a/src/TNL/Solvers/PDE/LinearSystemAssembler.h
+++ b/src/TNL/Solvers/PDE/LinearSystemAssembler.h
@@ -114,7 +114,7 @@ class LinearSystemAssembler
       "Error: I am getting Vector instead of MeshFunction or similar object. You might forget to bind DofVector into MeshFunction in you method getExplicitUpdate."  );
       const IndexType maxRowLength = matrixPointer.template getData< Devices::Host >().getMaxRowLength();
-      TNL_ASSERT( maxRowLength > 0, );
+      TNL_ASSERT_GT( maxRowLength, 0, "maximum row length must be positive" );
       this->userDataPointer->time = time;
       this->userDataPointer->tau = tau;
       this->userDataPointer->u = &uPointer.template getData< DeviceType >();
diff --git a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
index 6e996c2828e8b63200cf031eb53bef5d1728e251..1c324f8f944d43ae73cea4036ae252768112bc32 100644
--- a/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
+++ b/src/TNL/Solvers/PDE/SemiImplicitTimeStepper_impl.h
@@ -11,7 +11,8 @@
 #pragma once
 #include <TNL/Math.h>
-#include "SemiImplicitTimeStepper.h"
+#include <TNL/Solvers/PDE/SemiImplicitTimeStepper.h>
+#include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 namespace TNL {
 namespace Solvers {
@@ -69,8 +70,7 @@ init( const MeshPointer& mesh )
       std::cerr << "Please check the method 'setupLinearSystem' in your solver." << std::endl;
       return false;
-   if( ! this->rightHandSidePointer->setSize( this->matrix.getData().getRows() ) )
-      return false;
+   this->rightHandSidePointer->setSize( this->matrix.getData().getRows() );
@@ -154,14 +154,15 @@ solve( const RealType& time,
        DofVectorPointer& dofVector,
        MeshDependentDataPointer& meshDependentData )
-   TNL_ASSERT( this->problem != 0, );
+   TNL_ASSERT_TRUE( this->problem, "problem was not set" );
    RealType t = time;
    this->linearSystemSolver->setMatrix( this->matrix );
    PreconditionerPointer preconditioner;
    Linear::Preconditioners::SolverStarterSolverPreconditionerSetter< LinearSystemSolverType, PreconditionerType >
        ::run( *(this->linearSystemSolver), preconditioner );
-   while( t < stopTime )
+   // ignore very small steps at the end, most likely caused by truncation errors
+   while( stopTime - t > this->timeStep * 1e-6 )
       RealType currentTau = min( this->timeStep, stopTime - t );
@@ -207,9 +208,11 @@ solve( const RealType& time,
-      if( ! this->linearSystemSolver->template solve< DofVectorType, Linear::LinearResidueGetter< MatrixType, DofVectorType > >( *this->rightHandSidePointer, *dofVector ) )
+      if( ! this->linearSystemSolver->solve( *this->rightHandSidePointer, *dofVector ) )
          std::cerr << std::endl << "The linear system solver did not converge." << std::endl;
+         // save the linear system for debugging
+         this->problem->saveFailedLinearSystem( *this->matrix, *dofVector, *this->rightHandSidePointer );
          return false;
diff --git a/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h b/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
index 2a22201d1e54f2d75b809c5b2e914abb332bd390..6d20972d28c9f06cdf8da05e90567ac50f656e67 100644
--- a/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
+++ b/src/TNL/Solvers/PDE/TimeDependentPDESolver_impl.h
@@ -80,15 +80,8 @@ setup( const Config::ParameterContainer& parameters,
     * Set DOFs (degrees of freedom)
-   TNL_ASSERT( problem->getDofs( this->meshPointer ) != 0, );
-   std::cout << "Allocating dofs ... ";
-   if( ! this->dofsPointer->setSize( problem->getDofs( this->meshPointer ) ) )
-   {
-      std::cerr << std::endl;
-      std::cerr << "I am not able to allocate DOFs (degrees of freedom)." << std::endl;
-      return false;
-   }
-   std::cout << " [ OK ]" << std::endl;
+   TNL_ASSERT_GT( problem->getDofs( this->meshPointer ), 0, "number of DOFs must be positive" );
+   this->dofsPointer->setSize( problem->getDofs( this->meshPointer ) );
    this->dofsPointer->setValue( 0.0 );
    this->problem->bindDofs( this->meshPointer, this->dofsPointer );
@@ -101,11 +94,11 @@ setup( const Config::ParameterContainer& parameters,
     * Set-up the initial condition
-  std::cout << "Setting up the initial condition ... ";
+   std::cout << "Setting up the initial condition ... ";
    typedef typename Problem :: DofVectorType DofVectorType;
    if( ! this->problem->setInitialCondition( parameters, meshPointer, this->dofsPointer, this->meshDependentDataPointer ) )
       return false;
-  std::cout << " [ OK ]" << std::endl;
+   std::cout << " [ OK ]" << std::endl;
     * Initialize the time discretisation
@@ -141,8 +134,12 @@ writeProlog( Logger& logger,
       logger.writeParameter< double >( "Adaptivity:", "merson-adaptivity", parameters, 1 );
    if( solverName == "sor" )
       logger.writeParameter< double >( "Omega:", "sor-omega", parameters, 1 );
-   if( solverName == "gmres" )
-      logger.writeParameter< int >( "Restarting:", "gmres-restarting", parameters, 1 );
+   if( solverName == "gmres" || solverName == "cwygmres" ) {
+      logger.writeParameter< int >( "Restarting min:", "gmres-restarting-min", parameters, 1 );
+      logger.writeParameter< int >( "Restarting max:", "gmres-restarting-max", parameters, 1 );
+      logger.writeParameter< int >( "Restarting step min:", "gmres-restarting-step-min", parameters, 1 );
+      logger.writeParameter< int >( "Restarting step max:", "gmres-restarting-step-max", parameters, 1 );
+   }
    logger.writeParameter< double >( "Convergence residue:", "convergence-residue", parameters );
    logger.writeParameter< double >( "Divergence residue:", "divergence-residue", parameters );
    logger.writeParameter< int >( "Maximal number of iterations:", "max-iterations", parameters );
@@ -319,10 +316,8 @@ bool
 TimeDependentPDESolver< Problem, TimeStepper >::
-   TNL_ASSERT( timeStepper != 0,
-              std::cerr << "No time stepper was set in PDESolver." );
-   TNL_ASSERT( problem != 0,
-              std::cerr << "No problem was set in PDESolver." );
+   TNL_ASSERT_TRUE( timeStepper, "No time stepper was set in PDESolver." );
+   TNL_ASSERT_TRUE( problem, "No problem was set in PDESolver." );
    if( snapshotPeriod == 0 )
diff --git a/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h b/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
index 0f27de6489ffc3ee7a4ec37d6ca1d69c766f913a..957f710300bbc373b184dbd5736028377b68a7ba 100644
--- a/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
+++ b/src/TNL/Solvers/PDE/TimeIndependentPDESolver_impl.h
@@ -58,15 +58,8 @@ setup( const Config::ParameterContainer& parameters,
     * Set DOFs (degrees of freedom)
-   TNL_ASSERT( problem->getDofs( this->mesh ) != 0, );
-   cout << "Allocating dofs ... ";
-   if( ! this->dofs.setSize( problem->getDofs( this->mesh ) ) )
-   {
-      cerr << endl;
-      cerr << "I am not able to allocate DOFs (degrees of freedom)." << endl;
-      return false;
-   }
-   cout << " [ OK ]" << endl;
+   TNL_ASSERT_GT( problem->getDofs( this->mesh ), 0, "number of DOFs must be positive" );
+   this->dofs.setSize( problem->getDofs( this->mesh ) );
    this->dofs.setValue( 0.0 );
    this->problem->bindDofs( this->mesh, this->dofs );   
@@ -85,7 +78,6 @@ setup( const Config::ParameterContainer& parameters,
       return false;
    cout << " [ OK ]" << endl;
    return true;
@@ -149,8 +141,7 @@ bool
 tnlTimeIndependentPDESolver< Problem >::
-   TNL_ASSERT( problem != 0,
-              cerr << "No problem was set in tnlPDESolver." );
+   TNL_ASSERT_TRUE( problem, "No problem was set in tnlPDESolver." );
diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h
index a191add046b7f1b4e2b317a26e4abd23ee96606d..e66397310414edbecc68d7a1090e2186b952f851 100644
--- a/src/TNL/Solvers/SolverConfig_impl.h
+++ b/src/TNL/Solvers/SolverConfig_impl.h
@@ -26,6 +26,10 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    typedef DummyProblem< double, Devices::Host, int > DummyProblemType;
    config.addDelimiter( " === General parameters ==== " );
+   config.addEntry< bool >( "catch-exceptions",
+                            "Catch C++ exceptions. Disabling it allows the program to drop into the debugger "
+                            "and track the origin of the exception.",
+                            true );
     * Setup real type
@@ -51,6 +55,12 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    if( ConfigTagDevice< ConfigTag, Devices::Cuda >::enabled )
       config.addEntryEnum( "cuda" );
+#ifdef HAVE_MIC
+   if( ConfigTagDevice< ConfigTag, Devices::MIC >::enabled )
+      config.addEntryEnum( "mic" );
     * Setup index type.
@@ -107,6 +117,8 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
          config.addEntryEnum( "cg" );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabSolverTag >::enabled )
          config.addEntryEnum( "bicgstab" );
+      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabLSolverTag >::enabled )
+         config.addEntryEnum( "bicgstabl" );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCWYGMRESSolverTag >::enabled )
          config.addEntryEnum( "cwygmres" );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitGMRESSolverTag >::enabled )
@@ -123,6 +135,10 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
    config.addEntry< String >( "preconditioner", "The preconditioner for the discrete solver:", "none" );
    config.addEntryEnum( "none" );
    config.addEntryEnum( "diagonal" );
+// TODO: implement parallel ILU or device-dependent build config tags for preconditioners
+#ifndef HAVE_CUDA
+   config.addEntryEnum( "ilu0" );
    if( ConfigTagTimeDiscretisation< ConfigTag, ExplicitTimeDiscretisationTag >::enabled ||
        ConfigTagTimeDiscretisation< ConfigTag, SemiImplicitTimeDiscretisationTag >::enabled )
@@ -147,10 +163,15 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
          Linear::CG< MatrixType >::configSetup( config );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabSolverTag >::enabled )
          Linear::BICGStab< MatrixType >::configSetup( config );
+      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitBICGStabLSolverTag >::enabled )
+         Linear::BICGStabL< MatrixType >::configSetup( config );
+      // GMRES and CWYGMRES have the same options
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitCWYGMRESSolverTag >::enabled )
          Linear::CWYGMRES< MatrixType >::configSetup( config );
-      if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitGMRESSolverTag >::enabled )
+      else if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitGMRESSolverTag >::enabled )
          Linear::GMRES< MatrixType >::configSetup( config );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitTFQMRSolverTag >::enabled )
          Linear::TFQMR< MatrixType >::configSetup( config );
       if( ConfigTagSemiImplicitSolver< ConfigTag, SemiImplicitSORSolverTag >::enabled )
@@ -167,4 +188,3 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri
 } // namespace Solvers
 } // namespace TNL
diff --git a/src/TNL/Solvers/SolverInitiator_impl.h b/src/TNL/Solvers/SolverInitiator_impl.h
index c826563a3e2717c7119ad6e464b5521e228d04a2..1b5ddc1a3bd15ccd16cb4bcfa7bc6940f7342134 100644
--- a/src/TNL/Solvers/SolverInitiator_impl.h
+++ b/src/TNL/Solvers/SolverInitiator_impl.h
@@ -17,6 +17,7 @@
 #include <TNL/Solvers/Linear/GMRES.h>
 #include <TNL/Devices/Host.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Devices/MIC.h>
 namespace TNL {
 namespace Solvers {   
@@ -75,6 +76,8 @@ class SolverInitiatorRealResolver< ProblemSetter, Real, ConfigTag, true >
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Host, ConfigTag >::run( parameters );
          if( device == "cuda" )
             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Cuda, ConfigTag >::run( parameters );
+         if(device == "mic")
+             return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::MIC, ConfigTag >::run( parameters );
          std::cerr << "The device '" << device << "' is not defined. " << std::endl;
          return false;
diff --git a/src/TNL/Solvers/SolverMonitor.h b/src/TNL/Solvers/SolverMonitor.h
index 46911415f469aa16f52ac683607e598e2736cd2e..73d78ea4cd458f4e6981d56bf5357d5df6eac3da 100644
--- a/src/TNL/Solvers/SolverMonitor.h
+++ b/src/TNL/Solvers/SolverMonitor.h
@@ -20,16 +20,14 @@ namespace Solvers {
 class SolverMonitor
-   public:
-      : timeout_milliseconds(500),
-        stopped(true),
-        timer(nullptr)
-   {};
+      : timeout_milliseconds( 500 ),
+        started( false ),
+        stopped( false ),
+        timer( nullptr )
+   {}
-   ~SolverMonitor() {};
    virtual void refresh( bool force = false ) = 0;
    void setRefreshRate( const int& refreshRate )
@@ -44,7 +42,11 @@ class SolverMonitor
    void runMainLoop()
-      stopped = false;
+      // We need to use both 'started' and 'stopped' to avoid a deadlock
+      // when the loop thread runs this method delayed after the
+      // SolverMonitorThread's destructor has already called stopMainLoop()
+      // from the main thread.
+      started = true;
       const int timeout_base = 100;
       const std::chrono::milliseconds timeout( timeout_base );
@@ -62,6 +64,10 @@ class SolverMonitor
             std::this_thread::sleep_for( timeout );
+      // reset to initial state
+      started = false;
+      stopped = false;
    void stopMainLoop()
@@ -69,8 +75,12 @@ class SolverMonitor
       stopped = true;
-   protected:
+   bool isStopped() const
+   {
+      return stopped;
+   }
    double getElapsedTime()
       if( ! timer )
@@ -80,6 +90,7 @@ class SolverMonitor
    std::atomic_int timeout_milliseconds;
+   std::atomic_bool started;
    std::atomic_bool stopped;
    Timer* timer;
@@ -110,5 +121,4 @@ class SolverMonitorThread
 } // namespace Solvers
-} // namespace TNL
+} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h
index 516dab97d42bf11b0aa4185bc1acf01db2ab1542..a8fae47d95980bf6f1414c6adeb38c1413a82fa0 100644
--- a/src/TNL/Solvers/SolverStarter_impl.h
+++ b/src/TNL/Solvers/SolverStarter_impl.h
@@ -14,17 +14,21 @@
 #include <TNL/Logger.h>
 #include <TNL/String.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Solvers/SolverStarter.h>
+#include <TNL/Solvers/BuildConfigTags.h>
 #include <TNL/Solvers/ODE/Merson.h>
 #include <TNL/Solvers/ODE/Euler.h>
 #include <TNL/Solvers/Linear/SOR.h>
 #include <TNL/Solvers/Linear/CG.h>
 #include <TNL/Solvers/Linear/BICGStab.h>
+#include <TNL/Solvers/Linear/BICGStabL.h>
 #include <TNL/Solvers/Linear/GMRES.h>
 #include <TNL/Solvers/Linear/CWYGMRES.h>
 #include <TNL/Solvers/Linear/TFQMR.h>
 #include <TNL/Solvers/Linear/UmfpackWrapper.h>
 #include <TNL/Solvers/Linear/Preconditioners/Dummy.h>
 #include <TNL/Solvers/Linear/Preconditioners/Diagonal.h>
+#include <TNL/Solvers/Linear/Preconditioners/ILU0.h>
 #include <TNL/Solvers/PDE/ExplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/SemiImplicitTimeStepper.h>
 #include <TNL/Solvers/PDE/TimeDependentPDESolver.h>
@@ -179,23 +183,25 @@ class SolverStarterTimeDiscretisationSetter< Problem, SemiImplicitTimeDiscretisa
          if( discreteSolver != "sor" &&
              discreteSolver != "cg" &&
              discreteSolver != "bicgstab" &&
+             discreteSolver != "bicgstabl" &&
              discreteSolver != "gmres" &&
              discreteSolver != "cwygmres" &&
              discreteSolver != "tfqmr" )
-            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, gmres, cwygmres or tfqmr." << std::endl;
+            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, bicgstabl, gmres, cwygmres or tfqmr." << std::endl;
             return false;
          if( discreteSolver != "sor" &&
              discreteSolver != "cg" &&
              discreteSolver != "bicgstab" &&
+             discreteSolver != "bicgstabl" &&
              discreteSolver != "gmres" &&
              discreteSolver != "cwygmres" &&
              discreteSolver != "tfqmr" &&
              discreteSolver != "umfpack" )
-            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, gmres, cwygmres, tfqmr or umfpack." << std::endl;
+            std::cerr << "Unknown semi-implicit discrete solver " << discreteSolver << ". It can be only: sor, cg, bicgstab, bicgstabl, gmres, cwygmres, tfqmr or umfpack." << std::endl;
             return false;
@@ -206,6 +212,8 @@ class SolverStarterTimeDiscretisationSetter< Problem, SemiImplicitTimeDiscretisa
             return SolverStarterPreconditionerSetter< Problem, SemiImplicitCGSolverTag, ConfigTag >::run( problem, parameters );
          if( discreteSolver == "bicgstab" )
             return SolverStarterPreconditionerSetter< Problem, SemiImplicitBICGStabSolverTag, ConfigTag >::run( problem, parameters );
+         if( discreteSolver == "bicgstabl" )
+            return SolverStarterPreconditionerSetter< Problem, SemiImplicitBICGStabLSolverTag, ConfigTag >::run( problem, parameters );
          if( discreteSolver == "gmres" )
             return SolverStarterPreconditionerSetter< Problem, SemiImplicitGMRESSolverTag, ConfigTag >::run( problem, parameters );
          if( discreteSolver == "cwygmres" )
@@ -228,7 +236,7 @@ class SolverStarterTimeDiscretisationSetter< Problem, ImplicitTimeDiscretisation
       static bool run( Problem& problem,
                        const Config::ParameterContainer& parameters )
-         const String& discreteSolver = parameters. getParameter< String>( "discrete-solver" );
+//         const String& discreteSolver = parameters. getParameter< String>( "discrete-solver" );
          return false;
@@ -286,8 +294,10 @@ class SolverStarterPreconditionerSetter
             return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Dummy, ConfigTag >::run( problem, parameters );
          if( preconditioner == "diagonal" )
             return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Diagonal, ConfigTag >::run( problem, parameters );
+         if( preconditioner == "ilu0" )
+            return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::ILU0, ConfigTag >::run( problem, parameters );
-         std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal." << std::endl;
+         std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0." << std::endl;
          return false;
@@ -356,42 +366,58 @@ bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
    timeStepper.setSolver( discreteSolver );
-    * Set-up the PDE solver
+    * Open the log file
-   PDE::TimeDependentPDESolver< Problem, TimeStepper > solver;
-   solver.setProblem( problem );
-   solver.setTimeStepper( timeStepper );
-   if( ! solver.setup( parameters ) )
+   const String logFileName = parameters.getParameter< String >( "log-file" );
+   std::ofstream logFile( logFileName.getString() );
+   if( ! logFile ) {
+      std::cerr << "Unable to open the log file " << logFileName << "." << std::endl;
       return false;
+   }
-    * Write a prolog
+    * Set-up the PDE solver
-   int verbose = parameters.getParameter< int >( "verbose" );
-   parameters. getParameter< int >( "log-width", logWidth );
-   if( verbose )
-   {
-      Logger logger( logWidth,std::cout );
-      solver.writeProlog( logger, parameters );
-   }
-   String logFileName;
-   bool haveLogFile = parameters.getParameter< String >( "log-file", logFileName );
-   if( haveLogFile )
-   {
-      std::fstream logFile;
-      logFile.open( logFileName.getString(), std::ios::out );
-      if( ! logFile )
-      {
-         std::cerr << "Unable to open the log file " << logFileName << "." << std::endl;
+   PDE::TimeDependentPDESolver< Problem, TimeStepper > solver;
+   // catching exceptions ala gtest:
+   // https://github.com/google/googletest/blob/59c795ce08be0c8b225bc894f8da6c7954ea5c14/googletest/src/gtest.cc#L2409-L2431
+   const int catch_exceptions = parameters.getParameter< bool >( "catch-exceptions" );
+   if( catch_exceptions ) {
+      try {
+         solver.setProblem( problem );
+         solver.setTimeStepper( timeStepper );
+         if( ! solver.setup( parameters ) )
+            return false;
+      }
+      catch ( const std::exception& e ) {
+         std::cerr << "Setting up the solver failed due to a C++ exception with description: " << e.what() << std::endl;
+         logFile   << "Setting up The solver failed due to a C++ exception with description: " << e.what() << std::endl;
          return false;
-      else
-      {
-         Logger logger( logWidth, logFile );
-         solver.writeProlog( logger, parameters  );
-         logFile.close();
+      catch (...) {
+         std::cerr << "Setting up the solver failed due to an unknown C++ exception." << std::endl;
+         logFile   << "Setting up The solver failed due to an unknown C++ exception." << std::endl;
+         throw;
+   else {
+      solver.setProblem( problem );
+      solver.setTimeStepper( timeStepper );
+      if( ! solver.setup( parameters ) )
+         return false;
+   }
+   /****
+    * Write a prolog
+    */
+   const int verbose = parameters.getParameter< int >( "verbose" );
+   parameters.getParameter< int >( "log-width", logWidth );
+   if( verbose ) {
+      Logger logger( logWidth, std::cout );
+      solver.writeProlog( logger, parameters );
+   }
+   Logger logger( logWidth, logFile );
+   solver.writeProlog( logger, parameters  );
     * Set-up solver monitor and launch the main loop.
@@ -418,25 +444,34 @@ bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
     * Start the solver
-   bool returnCode = solver.solve();
-   solverMonitorPointer->stopMainLoop();
-   if( ! returnCode )
-   {
-      if( verbose )
-         std::cerr << std::endl << "The solver did not converge. " << std::endl;
-      std::fstream logFile;
-      logFile.open( logFileName.getString(), std::ios::out | std::ios::app );
-      if( ! logFile )
-      {
-         std::cerr << "Unable to open the log file " << logFileName << "." << std::endl;
+   bool returnCode = true;
+   // catching exceptions ala gtest:
+   // https://github.com/google/googletest/blob/59c795ce08be0c8b225bc894f8da6c7954ea5c14/googletest/src/gtest.cc#L2409-L2431
+   if( catch_exceptions ) {
+      try {
+         returnCode = solver.solve();
+      }
+      catch ( const std::exception& e ) {
+         std::cerr << "The solver failed due to a C++ exception with description: " << e.what() << std::endl;
+         logFile   << "The solver failed due to a C++ exception with description: " << e.what() << std::endl;
          return false;
-      else
-      {
-         logFile << "The solver did not converge. " << std::endl;
-         logFile.close();
+      catch (...) {
+         std::cerr << "The solver failed due to an unknown C++ exception." << std::endl;
+         logFile   << "The solver failed due to an unknown C++ exception." << std::endl;
+         throw;
+   else {
+      returnCode = solver.solve();
+   }
+   solverMonitorPointer->stopMainLoop();
+   if( ! returnCode ) {
+      if( verbose )
+         std::cerr << std::endl << "The solver did not converge. " << std::endl;
+      logFile << "The solver did not converge. " << std::endl;
+   }
     * Stop timers
@@ -448,22 +483,10 @@ bool SolverStarter< ConfigTag > :: runPDESolver( Problem& problem,
     * Write an epilog
    if( verbose )
-      writeEpilog(std::cout, solver );
-   if( haveLogFile )
-   {
-      std::fstream logFile;
-      logFile.open( logFileName.getString(), std::ios::out | std::ios::app );
-      if( ! logFile )
-      {
-         std::cerr << "Unable to open the log file " << logFileName << "." << std::endl;
-         return false;
-      }
-      else
-      {
-         writeEpilog( logFile, solver );
-         logFile.close();
-      }
-   }
+      writeEpilog( std::cout, solver );
+   writeEpilog( logFile, solver );
+   logFile.close();
    return returnCode;
diff --git a/src/TNL/StaticFor.h b/src/TNL/StaticFor.h
index 322c509852749fca2b063e54707ae776c7096b8a..bc63c43848b07251bf92b5db12b2a35f351f9f78 100644
--- a/src/TNL/StaticFor.h
+++ b/src/TNL/StaticFor.h
@@ -10,6 +10,8 @@
 #pragma once
 namespace TNL {
 template< typename IndexType, IndexType val >
@@ -123,7 +125,7 @@ class StaticFor
    static void exec()
-#ifndef HAVE_ICPC
       StaticForExecutor< IndexType,
                          StaticForIndexTag< IndexType, begin >,
                          StaticForIndexTag< IndexType, end - begin >,
@@ -137,7 +139,7 @@ class StaticFor
    static void exec( T &p )
-#ifndef HAVE_ICPC
       StaticForExecutor< IndexType,
                          StaticForIndexTag< IndexType, begin >,
                          StaticForIndexTag< IndexType, end - begin >,
@@ -152,7 +154,7 @@ class StaticFor
    static void exec( T0& p0, T1& p1 )
-#ifndef HAVE_ICPC
       StaticForExecutor< IndexType,
                          StaticForIndexTag< IndexType, begin >,
                          StaticForIndexTag< IndexType, end - begin >,
@@ -168,7 +170,7 @@ class StaticFor
    static void exec( T0& p0, T1& p1, T2& p2 )
-#ifndef HAVE_ICPC
       StaticForExecutor< IndexType,
                          StaticForIndexTag< IndexType, begin >,
                          StaticForIndexTag< IndexType, end - begin >,
@@ -185,7 +187,7 @@ class StaticFor
    static void exec( T0& p0, T1& p1, T2& p2, T3& p3 )
-#ifndef HAVE_ICPC
       StaticForExecutor< IndexType,
                          StaticForIndexTag< IndexType, begin >,
                          StaticForIndexTag< IndexType, end - begin >,
diff --git a/src/TNL/String.cpp b/src/TNL/String.cpp
index a3740961d26523d338679dc574b1163895960b80..af8b46da8af1a142aca151027f8149cc266c426a 100644
--- a/src/TNL/String.cpp
+++ b/src/TNL/String.cpp
@@ -10,7 +10,6 @@
 #include <cstring>
 #include <string.h>
-#include <assert.h>
 #include <TNL/String.h>
 #include <TNL/Assert.h>
 #include <TNL/Containers/List.h>
@@ -24,123 +23,123 @@ namespace TNL {
 const unsigned int STRING_PAGE = 256;
-String :: String()
+   : string( nullptr ), length( 0 )
-   string = new char[ STRING_PAGE ];
-   string[ 0 ] = 0;
-   length = STRING_PAGE;
+   setString( nullptr );
-String :: String( const char* c, int prefix_cut_off, int sufix_cut_off )
-   : string( 0 ), length( 0 )
+String::String( const char* c, int prefix_cut_off, int sufix_cut_off )
+   : string( nullptr ), length( 0 )
    setString( c, prefix_cut_off, sufix_cut_off );
-String :: String( const String& str )
-: string( 0 ), length( 0 )
+String::String( const String& str )
+   : string( nullptr ), length( 0 )
-   setString( str. getString() );
+   setString( str.getString() );
-String :: String( unsigned number )
-: string( 0 ), length( 0 )
+String String::getType()
-   this->setString( convertToString( number ).getString() );
-String :: String( int number )
-: string( 0 ), length( 0 )
-   this->setString( convertToString( number ).getString() );
-String :: String( unsigned long int number )
-: string( 0 ), length( 0 )
-   this->setString( convertToString( number ).getString() );
+   return String( "String" );
-String :: String( long int number )
-: string( 0 ), length( 0 )
-   this->setString( convertToString( number ).getString() );
+   if( string ) delete[] string;
-String :: String( float number )
-: string( 0 ), length( 0 )
+int String::getLength() const
-   this->setString( convertToString( number ).getString() );
+   return getSize();
-String :: String( double number )
-: string( 0 ), length( 0 )
+int String::getSize() const
-   this->setString( convertToString( number ).getString() );
+   return strlen( string );
-String String :: getType()
+int String::getAllocatedSize() const
-   return String( "String" );
+   return length;
-String :: ~String()
+void String::setSize( int size )
-   if( string ) delete[] string;
+   TNL_ASSERT_GE( size, 0, "string size must be non-negative" );
+   const int _length = STRING_PAGE * ( size / STRING_PAGE + 1 );
+   TNL_ASSERT_GE( _length, 0, "_length size must be non-negative" );
+   if( length != _length ) {
+      if( string ) {
+         delete[] string;
+         string = nullptr;
+      }
+      string = new char[ _length ];
+      length = _length;
+   }
-void String :: setString( const char* c, int prefix_cut_off, int sufix_cut_off )
+void String::setString( const char* c, int prefix_cut_off, int sufix_cut_off )
-   if( ! c )
-   {
+   if( ! c ) {
       if( ! string )
-      {
-         string = new char[ STRING_PAGE ];
-         length = STRING_PAGE;
-      }
+         setSize( 1 );
       string[ 0 ] = 0;
-   int c_len = ( int ) strlen( c );
-   int _length = max( 0, c_len - prefix_cut_off - sufix_cut_off );
+   const int c_len = ( int ) strlen( c );
+   const int _length = max( 0, c_len - prefix_cut_off - sufix_cut_off );
    if( length < _length || length == 0 )
-   {
-      if( string ) delete[] string;
-      length = STRING_PAGE * ( _length / STRING_PAGE + 1 );
-      string = new char[ length ];
-   }
+      setSize( _length );
    TNL_ASSERT( string, );
-   memcpy( string, c + min( c_len, prefix_cut_off ), sizeof( char ) * ( _length ) );
+   memcpy( string, c + min( c_len, prefix_cut_off ), _length * sizeof( char ) );
    string[ _length ] = 0;
-const char& String :: operator[]( int i ) const
+const char* String::getString() const
+   return string;
+char* String::getString()
+   return string;
+const char& String::operator[]( int i ) const
    TNL_ASSERT( i >= 0 && i < length,
-              std::cerr << "Accessing char outside the string." );
+               std::cerr << "Accessing char outside the string." );
    return string[ i ];
-char& String :: operator[]( int i )
+char& String::operator[]( int i )
    TNL_ASSERT( i >= 0 && i < length,
-              std::cerr << "Accessing char outside the string." );
+               std::cerr << "Accessing char outside the string." );
    return string[ i ];
-String& String :: operator = ( const String& str )
+ * Operators for C strings
+ */
+String& String::operator=( const char* str )
-   setString( str. getString() );
-   return * this;
+   setString( str );
+   return *this;
-String& String :: operator += ( const char* str )
+String& String::operator+=( const char* str )
    if( str )
-      int len1 = strlen( string );
-      int len2 = strlen( str );
+      const int len1 = strlen( string );
+      const int len2 = strlen( str );
       if( len1 + len2 < length )
          memcpy( string + len1, str, sizeof( char ) * ( len2 + 1 ) );
@@ -152,12 +151,70 @@ String& String :: operator += ( const char* str )
          memcpy( string + len1, str, sizeof( char ) * ( len2 + 1 ) );
-   return * this;
+   return *this;
+String String::operator+( const char* str ) const
+   return String( *this ) += str;
+bool String::operator==( const char* str ) const
+   TNL_ASSERT( string && str, );
+   return strcmp( string, str ) == 0;
+bool String::operator!=( const char* str ) const
+   return ! operator==( str );
+ * Operators for Strings
+ */
+String& String::operator=( const String& str )
+   setString( str.getString() );
+   return *this;
+String& String::operator+=( const String& str )
+   return operator+=( str.getString() );
+String String::operator+( const String& str ) const
+   return String( *this ) += str;
+bool String::operator==( const String& str ) const
+   TNL_ASSERT( string && str.string, );
+   return strcmp( string, str.string ) == 0;
-String& String :: operator += ( const char str )
+bool String::operator!=( const String& str ) const
-   int len1 = strlen( string );
+   return ! operator==( str );
+ * Operators for single characters
+ */
+String& String::operator=( char str )
+   string[ 0 ] = str;
+   string[ 1 ] = 0;
+   return *this;
+String& String::operator+=( const char str )
+   const int len1 = strlen( string );
    if( len1 + 1 < length )
       string[ len1 ] = str;
@@ -173,106 +230,92 @@ String& String :: operator += ( const char str )
       string[ len1 + 1 ] = 0;
-   return * this;
-String& String :: operator += ( const String& str )
-   return operator += ( str. getString() );
+   return *this;
-String String :: operator + ( const String& str ) const
+String String::operator+( char str ) const
    return String( *this ) += str;
-String String :: operator + ( const char* str ) const
+bool String::operator==( char str ) const
-   return String( *this ) += str;
+   return *this == String( str );
-bool String :: operator == ( const String& str ) const
+bool String::operator!=( char str ) const
-   assert( string && str. string );
-   if( str. length != length )
-      return false;
-   if( strcmp( string, str. string ) == 0 )
-      return true;
-   return false;
+   return ! operator==( str );
-bool String :: operator != ( const String& str ) const
-   return ! operator == ( str );
-bool String :: operator == ( const char* str ) const
-   //cout << ( void* ) string << " " << ( void* ) str << std::endl;
-   assert( string && str );
-   if( strcmp( string, str ) == 0 ) return true;
-   return false;
-String :: operator bool () const
+String::operator bool () const
    if( string[ 0 ] ) return true;
    return false;
-bool String :: operator != ( const char* str ) const
+bool String::operator!() const
-   return ! operator == ( str );
+   return ! operator bool();
-int String :: getLength() const
+String String::replace( const String& pattern,
+                        const String& replaceWith,
+                        int count ) const
-   return strlen( string );
-replace( const String& pattern,
-         const String& replaceWith )
-   int occurences( 0 );
-   int patternLength = pattern.getLength();
    const int length = this->getLength();
-   int patternPointer( 0 );
+   const int patternLength = pattern.getLength();
+   const int replaceWithLength = replaceWith.getLength();
+   int patternPointer = 0;
+   int occurrences = 0;
    for( int i = 0; i < length; i++ )
       if( this->string[ i ] == pattern[ patternPointer ] )
       if( patternPointer == patternLength )
-         occurences++;
+         occurrences++;
          patternPointer = 0;
-   const int replaceWithLength = replaceWith.getLength();
-   int newStringLength = length + occurences * ( replaceWithLength - patternLength );
-   char* newString = new char[ newStringLength ];
-   int newStringPointer( 0 );
-   int lastPatternStart( 0 );
-   for( int i = 0; i < length; i++ )
-   {
+   if( count > 0 && occurrences > count )
+      occurrences = count;
+   String newString;
+   const int newStringLength = length + occurrences * ( replaceWithLength - patternLength );
+   newString.setSize( newStringLength );
+   int newStringHead = 0;
+   patternPointer = 0;
+   occurrences = 0;
+   for( int i = 0; i < length; i++ ) {
+      // copy current character
+      newString[ newStringHead++ ] = this->string[ i ];
+      // check if pattern matches
       if( this->string[ i ] == pattern[ patternPointer ] )
-      {
-         if( patternPointer == 0 )
-            lastPatternStart = newStringPointer;
-      }
-      newString[ newStringPointer++ ] = this->string[ i ];
-      if( patternPointer == patternLength )
-      {
-         newStringPointer = lastPatternStart;
-         for( int j = 0; j < replaceWithLength; j++ )
-            newString[ newStringPointer++ ] = replaceWith[ j ];
+      else
+         patternPointer = 0;
+      // handle full match
+      if( patternPointer == patternLength ) {
+         // skip unwanted replacements
+         if( count == 0 || occurrences < count ) {
+            newStringHead -= patternLength;
+            for( int j = 0; j < replaceWithLength; j++ )
+               newString[ newStringHead++ ] = replaceWith[ j ];
+         }
+         occurrences++;
          patternPointer = 0;
-   delete[] this->string;
-   this->string = newString;
+   newString[ newStringHead ] = 0;
+   return newString;
@@ -292,115 +335,48 @@ String::strip( char strip ) const
    return "";
-const char* String :: getString() const
+int String::split( Containers::List< String >& list, const char separator ) const
-   return string;
-char* String :: getString()
-   return string;
-bool String :: save( std::ostream& file ) const
-   assert( string );
-   int len = strlen( string );
-   file. write( ( char* ) &len, sizeof( int ) );
-   file. write( string, len );
-   if( file. bad() ) return false;
-   return true;
-bool String :: load( std::istream& file )
-   int _length;
-   file. read( ( char* ) &_length, sizeof( int ) );
-   if( file. bad() ) return false;
-   if( ! _length )
-   {
-      string[ 0 ] = 0;
-      length = 0;
-      return true;
-   }
-   if( string && length < _length )
-   {
-      delete[] string;
-      string = NULL;
-   }
-   if( ! string )
+   list.reset();
+   String copy( *this );
+   int len = copy.getLength();
+   for( int i = 0; i < len; i ++ )
+      if( copy[ i ] == separator )
+         copy[ i ] = 0;
+   for( int i = 0; i < len; i ++ )
-      //dbgCout( "Reallocating string..." );
-      length = STRING_PAGE * ( _length / STRING_PAGE + 1 );
-      string = new char[ length ];
+      if( copy[ i ] == 0 ) continue;
+      String new_string;
+      new_string.setString( &copy.getString()[ i ] );
+      i += new_string.getLength();
+      list.Append( new_string );
-   file. read( string, _length );
-   if( file. bad() ) return false;
-   string[ _length ] = 0;
-   return true;
+   return list.getSize();
-bool String :: save( File& file ) const
+bool String::save( File& file ) const
    TNL_ASSERT( string,
               std::cerr << "string = " << string );
    int len = strlen( string );
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< int, Devices::Host >( &len ) )
-   if( ! file. write( &len ) )
+   if( ! file.write( &len ) )
       return false;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. write< char, Devices::Host, int >( string, len ) )
-   if( ! file. write( string, len ) )
+   if( ! file.write( string, len ) )
       return false;
    return true;
-bool String :: load( File& file )
+bool String::load( File& file )
    int _length;
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< int, Devices::Host >( &_length ) )
-   if( ! file. read( &_length ) )
-   {
+   if( ! file.read( &_length ) ) {
       std::cerr << "I was not able to read String length." << std::endl;
       return false;
-   if( ! _length )
-   {
-      string[ 0 ] = 0;
-      length = 0;
-      return true;
-   }
-   if( string && length < _length )
-   {
-      delete[] string;
-      string = NULL;
-   }
-   if( ! string )
-   {
-      //dbgCout( "Reallocating string..." );
-      length = STRING_PAGE * ( _length / STRING_PAGE + 1 );
-      string = new char[ length ];
-   }
-#ifdef HAVE_NOT_CXX11
-   if( ! file. read< char, Devices::Host, int >( string, _length ) )
-   if( ! file. read( string, _length ) )
-   {
+   setSize( _length );
+   if( _length && ! file.read( string, _length ) ) {
       std::cerr << "I was not able to read a String with a length " << length << "." << std::endl;
       return false;
@@ -414,7 +390,7 @@ bool String :: load( File& file )
    dbgFunctionName( "mString", "MPIBcast" );
    int iproc;
    MPI_Comm_rank( MPI_COMM_WORLD, &iproc );
-   assert( string );
+   TNL_ASSERT( string, );
    int len = strlen( string );
    MPI_Bcast( &len, 1, MPI_INT, root, comm );
    dbgExpr( iproc );
@@ -437,40 +413,26 @@ bool String :: load( File& file )
 bool String :: getLine( std::istream& stream )
-   std :: string str;
+   std::string str;
    getline( stream, str );
-   this->setString( str. data() );
+   this->setString( str.c_str() );
    if( ! ( *this ) ) return false;
    return true;
-int String :: parse( Containers::List< String >& list, const char separator ) const
+String operator+( char string1, const String& string2 )
-   list.reset();
-   String copy( *this );
-   int len = copy. getLength();
-   for( int i = 0; i < len; i ++ )
-      if( copy[ i ] == separator )
-         copy[ i ] = 0;
-   for( int i = 0; i < len; i ++ )
-   {
-      if( copy[ i ] == 0 ) continue;
-      String new_string;
-      new_string. setString( &copy. getString()[ i ] );
-      i += new_string. getLength();
-      list. Append( new_string );
-   }
-   return list. getSize();
+   return String( string1 ) + string2;
-String operator + ( const char* string1, const String& string2 )
+String operator+( const char* string1, const String& string2 )
    return String( string1 ) + string2;
-std::ostream& operator << ( std::ostream& stream, const String& str )
+std::ostream& operator<<( std::ostream& stream, const String& str )
-   stream << str. getString();
+   stream << str.getString();
    return stream;
diff --git a/src/TNL/String.h b/src/TNL/String.h
index 88a52704e576e5d1032f13633ebf79c4267fb845..66c8068968f0da79ddb2ecedd418b4445dbc500b 100644
--- a/src/TNL/String.h
+++ b/src/TNL/String.h
@@ -10,7 +10,6 @@
 #pragma once
-#include <stdio.h>
 #include <iostream>
 #include <sstream>
 #include <TNL/mpi-supp.h>
@@ -23,6 +22,9 @@ namespace Containers {
    template< class T > class List;
+template< typename T >
+String convertToString( const T& value );
 //! Class for managing strings
 class String
@@ -32,8 +34,7 @@ class String
    //! Length of the allocated piece of memory
    int length;
-   public:
    //! Basic constructor
@@ -42,37 +43,42 @@ class String
        @param sufix_cut_off says the same about sufix.
    String( const char* c,
-              int prefix_cut_off = 0,
-              int sufix_cut_off = 0 );
+           int prefix_cut_off = 0,
+           int sufix_cut_off = 0 );
    static String getType();
    //! Copy constructor
    String( const String& str );
-   //! Convert number to a string
-   String( unsigned number );
-   String( int number );
-   String( unsigned long int number );
+   //! Convert anything to a string
+   template< typename T >
+   String( T value )
+      : string( nullptr ), length( 0 )
+   {
+      setString( convertToString( value ).getString() );
+   }
-   String( long int number );
+   //! Destructor
+   ~String();
-   String( float number );
+   //! Return length of the string
+   int getLength() const;
+   int getSize() const;
-   String( double number );
+   //! Return currently allocated size
+   int getAllocatedSize() const;
-   //! Destructor
-   ~String();
+   //! Reserve space for given number of characters
+   void setSize( int size );
    //! Set string from given char pointer
    /*! @param prefix_cut_off says length of the prefix that is going to be omitted and
        @param sufix_cut_off says the same about sufix.
    void setString( const char* c,
-                     int prefix_cut_off = 0,
-                     int sufix_cut_off = 0 );
+                   int prefix_cut_off = 0,
+                   int sufix_cut_off = 0 );
    //! Return pointer to data
    const char* getString() const;
@@ -87,58 +93,46 @@ class String
    char& operator[]( int i );
-    * TODO: the operators do not work properly
-    * for example String + const char*
+    * Operators for C strings
-   //! Operator =
-   String& operator = ( const String& str );
-   //! Operator +=
-   String& operator += ( const char* str );
-   //! Operator +=
-   String& operator += ( const char str );
-   //! Operator +=
-   String& operator += ( const String& str );
+   String& operator=( const char* str );
+   String& operator+=( const char* str );
+   String operator+( const char* str ) const;
+   bool operator==( const char* str ) const;
+   bool operator!=( const char* str ) const;
-   //! Operator +
-   String operator + ( const String& str ) const;
-   //! Operator +
-   String operator + ( const char* str ) const;
+   /****
+    * Operators for Strings
+    */
+   String& operator=( const String& str );
+   String& operator+=( const String& str );
+   String operator+( const String& str ) const;
+   bool operator==( const String& str ) const;
+   bool operator!=( const String& str ) const;
-   //! Comparison operator
-   bool operator == ( const String& str ) const;
+   /****
+    * Operators for single characters
+    */
+   String& operator=( char str );
+   String& operator+=( char str );
+   String operator+( char str ) const;
+   bool operator==( char str ) const;
+   bool operator!=( char str ) const;
-   //! Comparison operator
-   bool operator != ( const String& str ) const;
+   //! Cast to bool operator
+   operator bool() const;
-   //! Comparison operator
-   bool operator == ( const char* ) const;
+   //! Cast to bool with negation operator
+   bool operator!() const;
-   //! Comparison operator
-   bool operator != ( const char* ) const;
-   //! Retyping operator
-   operator bool () const;
-   //! Return length of the string
-   int getLength() const;
-   void replace( const String& pattern,
-                 const String& replaceWith );
+   String replace( const String& pattern,
+                   const String& replaceWith,
+                   int count = 0 ) const;
    String strip( char strip = ' ' ) const;
-   // TODO: remove
-   //! Write to a binary file
-   bool save( std::ostream& file ) const;
-   // TODO: remove
-   //! Read from binary file
-   bool load( std::istream& file );
+   //! Split the string into list of strings w.r.t. given separator.
+   int split( Containers::List< String >& list, const char separator = ' ' ) const;
    //! Write to a binary file
    bool save( File& file ) const;
@@ -152,15 +146,14 @@ class String
    //! Read one line from given stream.
    bool getLine( std::istream& stream );
-   //! Parse the string into list of strings w.r.t. given separator.
-   int parse( Containers::List< String >& list, const char separator = ' ' ) const;
-   friend std::ostream& operator << ( std::ostream& stream, const String& str );
+   friend std::ostream& operator<<( std::ostream& stream, const String& str );
-String operator + ( const char* string1, const String& string2 );
+String operator+( char string1, const String& string2 );
+String operator+( const char* string1, const String& string2 );
-std::ostream& operator << ( std::ostream& stream, const String& str );
+std::ostream& operator<<( std::ostream& stream, const String& str );
 template< typename T >
 String convertToString( const T& value )
diff --git a/src/TNL/UniquePointer.h b/src/TNL/UniquePointer.h
index f8e85e04756e218463d7d6f21c85792238f0cb79..606f86cc4326c57cd73f7efa525dbcc4b344306b 100644
--- a/src/TNL/UniquePointer.h
+++ b/src/TNL/UniquePointer.h
@@ -18,6 +18,8 @@
 #include <cstring>
+#include "Devices/MIC.h"
 namespace TNL { 
@@ -61,11 +63,18 @@ class UniquePointer< Object, Devices::Host > : public SmartPointer
          return *( this->pointer );
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pointer;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pointer;
+      }
       template< typename Device = Devices::Host >
       const Object& getData() const
@@ -148,11 +157,18 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          return this->pd->data;
-      operator bool()
+      __cuda_callable__
+      operator bool() const
          return this->pd;
+      __cuda_callable__
+      bool operator!() const
+      {
+         return ! this->pd;
+      }
       template< typename Device = Devices::Host >      
       const Object& getData() const
@@ -203,7 +219,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
          if( this->modified() )
             cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice );
-            if( ! checkCudaDevice )
+            if( ! TNL_CHECK_CUDA_DEVICE )
                return false;
             return true;
@@ -239,12 +255,8 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       bool allocate( Args... args )
          this->pd = new PointerData( args... );
-         if( ! this->pd )
-            return false;
          // pass to device
          this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data );
-         if( ! this->cuda_pointer )
-            return false;
          // set last-sync state
          Devices::Cuda::insertSmartPointer( this );
@@ -282,5 +294,194 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer
       Object* cuda_pointer;
+#ifdef HAVE_MIC
+template< typename Object >
+class UniquePointer< Object, Devices::MIC > : public SmartPointer
+   public:
+      typedef Object ObjectType;
+      typedef Devices::MIC DeviceType;
+      typedef UniquePointer< Object, Devices::MIC > ThisType;
+      template< typename... Args >
+      explicit  UniquePointer( const Args... args )
+      : pd( nullptr ),
+        cuda_pointer( nullptr )
+      {
+         this->allocate( args... );
+      }
+      const Object* operator->() const
+      {
+         return &this->pd->data;
+      }
+      Object* operator->()
+      {
+         this->pd->maybe_modified = true;
+         return &this->pd->data;
+      }
+      const Object& operator *() const
+      {
+         return this->pd->data;
+      }
+      Object& operator *()
+      {
+         this->pd->maybe_modified = true;
+         return this->pd->data;
+      }
+      operator bool()
+      {
+         return this->pd;
+      }
+      template< typename Device = Devices::Host >      
+      const Object& getData() const
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+            return this->pd->data;
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );            
+      }
+      template< typename Device = Devices::Host >
+      Object& modifyData()
+      {
+         static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." );
+         TNL_ASSERT( this->pd, );
+         TNL_ASSERT( this->mic_pointer, );
+         if( std::is_same< Device, Devices::Host >::value )
+         {
+            this->pd->maybe_modified = true;
+            return this->pd->data;
+         }
+         if( std::is_same< Device, Devices::MIC >::value )
+            return *( this->mic_pointer );
+      }
+      const ThisType& operator=( ThisType& ptr )
+      {
+         this->free();
+         this->pd = ptr.pd;
+         this->mic_pointer = ptr.mic_pointer;
+         ptr.pd = nullptr;
+         ptr.mic_pointer = nullptr;
+         return *this;
+      }
+      const ThisType& operator=( ThisType&& ptr )
+      {
+         return this->operator=( ptr );
+      }      
+      bool synchronize()
+      {
+         if( ! this->pd )
+            return true;
+         if( this->modified() )
+         { 
+            Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object));  
+            this->set_last_sync_state();
+            return true;
+         }
+         return true;//??
+      }
+      ~UniquePointer()
+      {
+         this->free();
+         Devices::MIC::removeSmartPointer( this );
+      }
+   protected:
+      struct PointerData
+      {
+         Object data;
+         char data_image[ sizeof(Object) ];
+         bool maybe_modified;
+         template< typename... Args >
+         explicit PointerData( Args... args )
+         : data( args... ),
+           maybe_modified( false )
+         {}
+      };
+      template< typename... Args >
+      bool allocate( Args... args )
+      {
+         this->pd = new PointerData( args... );
+         if( ! this->pd )
+            return false;
+         // pass to device
+         this->mic_pointer=(Object*)Devices::MIC::AllocMIC(sizeof(Object));
+         if( ! this->mic_pointer )
+            return false;
+         Devices::MIC::CopyToMIC((void*)mic_pointer,(void*)&this->pd->data,sizeof(Object));
+         // set last-sync state
+         this->set_last_sync_state();
+         Devices::MIC::insertSmartPointer( this );
+         return true;
+      }
+      void set_last_sync_state()
+      {
+         TNL_ASSERT( this->pd, );
+         std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) );
+         this->pd->maybe_modified = false;
+      }
+      bool modified()
+      {
+         TNL_ASSERT( this->pd, );
+         // optimization: skip bitwise comparison if we're sure that the data is the same
+         if( ! this->pd->maybe_modified )
+            return false;
+         return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) ) != 0;
+      }
+      void free()
+      {
+         if( this->pd )
+            delete this->pd;
+         if( this->mic_pointer )
+             Devices::MIC::FreeMIC(mic_pointer);
+      }
+      PointerData* pd;
+      // cuda_pointer can't be part of PointerData structure, since we would be
+      // unable to dereference this-pd on the device
+      Object* mic_pointer;
+#if  (!defined(NDEBUG)) && (!defined(HAVE_MIC)) 
+namespace Assert {
+template< typename Object, typename Device >
+struct Formatter< UniquePointer< Object, Device > >
+   static std::string
+   printToString( const UniquePointer< Object, Device >& value )
+   {
+      ::std::stringstream ss;
+      ss << "(UniquePointer< " << Object::getType() << ", " << Device::getDeviceType()
+         << " > object at " << &value << ")";
+      return ss.str();
+   }
+} // namespace Assert
 } // namespace TNL
diff --git a/src/TNL/legacy/CMakeLists.txt b/src/TNL/legacy/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h b/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h
index 33153a50ff0e192db596897a7b701ace99171f68..1bb3dfd96c4d807af244c5f3424990b9278f1824 100644
--- a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h
+++ b/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h
@@ -15,6 +15,7 @@
 #include <TNL/File.h>
 #include <TNL/Object.h>
 #include <TNL/Devices/Cuda.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 #include <TNL/Config/ConfigDescription.h>
 #include <TNL/Config/ParameterContainer.h>
 #include <TNL/Matrices/CSR.h>
@@ -78,11 +79,7 @@ bool benchmarkSolver( const Config::ParameterContainer&  parameters,
    solver. setSolverMonitor( solverMonitor );
    solver. setRefreshRate( 10 );
    solverMonitor. resetTimers();
-#ifdef HAVE_NOT_CXX11
-   solver. template solve< Vector, LinearResidueGetter< Matrix, Vector > >( b, x );
    solver. solve( b, x );
    bool solverConverged( solver. getResidue() < maxResidue );
    const String& logFileName = parameters. getParameter< String >( "log-file" );
@@ -313,8 +310,7 @@ bool benchmarkMatrix( const Config::ParameterContainer&  parameters )
          return false;
       x = cudaX;*/
-      CudaSupportMissingMessage;;
-      return false;
+      throw Exceptions::CudaSupportMissing();
diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h
index 846a0c8cc7590f9d332073faf3c0011a5fdb2840..35a5b388c9b317d3fbf4292337814b76d6d7122b 100644
--- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h
+++ b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h
@@ -12,7 +12,9 @@
 #include "tnlSpmvBenchmark.h"
 #include <TNL/Assert.h>
+#include <TNL/Exceptions/CudaSupportMissing.h>
 template< typename Real, typename Device, typename Index>
 class tnlSpmvBenchmarkAdaptiveRgCSR : public tnlSpmvBenchmark< Real, Device, Index, tnlAdaptiveRgCSR >
@@ -131,7 +133,7 @@ void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: writeProgress() con
        std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << "  FAILED";
 #ifndef HAVE_CUDA
    if( Device :: getDevice() == Devices::CudaDevice )
-      CudaSupportMissingMessage;;
+      throw Exceptions::CudaSupportMissing();
      std::cout << std::endl;
diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h
index b0be71e993fde254731aef550d5916401eec073c..6327ac95d659ebc6592d458f27288a11cf34d141 100644
--- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h
+++ b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h
@@ -13,6 +13,8 @@
 #include "tnlSpmvBenchmark.h"
+#include <TNL/Exceptions/CudaSupportMissing.h>
 template< typename Real, typename Device, typename Index>
 class tnlSpmvBenchmarkRgCSR : public tnlSpmvBenchmark< Real, Device, Index, tnlRgCSR >
@@ -134,7 +136,7 @@ void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: writeProgress() const
        std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << "  FAILED - maxError is " << this->maxError << ". ";
 #ifndef HAVE_CUDA
    if( Device :: getDevice() == Devices::CudaDevice )
-      CudaSupportMissingMessage;;
+      throw Exceptions::CudaSupportMissing();
      std::cout << std::endl;
diff --git a/src/TNL/param-types.h b/src/TNL/param-types.h
index 790cf627abc0c58094e3113b664019130d53e825..8a13e09565dd3ede7c2414c9504b0e12ab142a5d 100644
--- a/src/TNL/param-types.h
+++ b/src/TNL/param-types.h
@@ -16,7 +16,7 @@
 namespace TNL {
 template< typename T >
-String getType() { return T :: getType(); };
+String getType() { return T::getType(); };
 template<> inline String getType< bool >() { return String( "bool" ); };
 template<> inline String getType< short int >() { return String( "short int" ); };
diff --git a/src/Tools/CMakeLists.txt b/src/Tools/CMakeLists.txt
old mode 100755
new mode 100644
index 3934359e9ba8448dc0fdf69bddac7a3101eb70ab..7069abf67277eda2f4d77835c868d166482715b3
--- a/src/Tools/CMakeLists.txt
+++ b/src/Tools/CMakeLists.txt
@@ -56,12 +56,7 @@ ADD_EXECUTABLE(tnl-curve2gnuplot${debugExt} ${tnlcurve2gnuplotsources})
 target_link_libraries (tnl-curve2gnuplot${debugExt} tnl${debugExt}-${tnlVersion} )
-    CUDA_ADD_EXECUTABLE( tnl-cuda-arch${debugExt} tnl-cuda-arch.cu
-                         OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnl-cuda-arch${debugExt} tnl-cuda-arch.cu )
    INSTALL( TARGETS tnl-cuda-arch${debugExt}
             RUNTIME DESTINATION bin
diff --git a/src/Tools/tnl-cuda-arch.cu b/src/Tools/tnl-cuda-arch.cu
index 7f1fb17496601adc5ffc84467a9c5db25feae244..7d880a19a32b7f41bf0fd45d7949f73f8c5933e9 100644
--- a/src/Tools/tnl-cuda-arch.cu
+++ b/src/Tools/tnl-cuda-arch.cu
@@ -1,6 +1,8 @@
 #include <stdio.h> 
+#include <string.h>
-int main() {
+int main( int argc, char** argv )
     int num_devices = 0;
     cudaError_t error_id = cudaGetDeviceCount( &num_devices );
@@ -21,8 +23,15 @@ int main() {
             if( i > 0 )
                 printf(" ");
-            printf( "-gencode arch=compute_%d%d,code=sm_%d%d",
-                    prop.major, compute_minor, prop.major, prop.minor );
+            if( argc == 2 && strcmp( argv[1], "--clang" ) == 0 ) {
+                printf( "--cuda-gpu-arch=sm_%d%d",
+                        prop.major, prop.minor );
+            }
+            else {
+                printf( "-gencode arch=compute_%d%d,code=sm_%d%d",
+                        prop.major, compute_minor, prop.major, prop.minor );
+            }
diff --git a/src/Tools/tnl-diff.cpp b/src/Tools/tnl-diff.cpp
index 76c2e792bc5e7b2a8f09304ac038334506c7e586..013f7bf21a2218f940a037ca94e39709a14b0505 100644
--- a/src/Tools/tnl-diff.cpp
+++ b/src/Tools/tnl-diff.cpp
@@ -60,7 +60,7 @@ int main( int argc, char* argv[] )
    if( ! parseObjectType( meshType, parsedMeshType ) )
       std::cerr << "Unable to parse the mesh type " << meshType << "." << std::endl;
-      return false;
+      return EXIT_FAILURE;
    if( parsedMeshType[ 0 ] == "Meshes::Grid" ||
        parsedMeshType[ 0 ] == "tnlGrid" )        // TODO: remove deprecated type name
diff --git a/src/Tools/tnl-init.cpp b/src/Tools/tnl-init.cpp
index 90870bad68daa3761c20c44b2d04e24ad22c5677..f42b9ad3c9aeb02690ef4f7def74acf6c2ce3087 100644
--- a/src/Tools/tnl-init.cpp
+++ b/src/Tools/tnl-init.cpp
@@ -65,7 +65,7 @@ int main( int argc, char* argv[] )
    if( ! parseObjectType( meshType, parsedMeshType ) )
       std::cerr << "Unable to parse the mesh type " << meshType << "." << std::endl;
-      return false;
+      return EXIT_FAILURE;
    if( ! resolveMeshType( parsedMeshType, parameters ) )
       return EXIT_FAILURE;
diff --git a/src/Tools/tnl-init.h b/src/Tools/tnl-init.h
index 182f51b28536b06ac86d5e76c3d997e01367c83a..355c553f02695d28f45c3994ef80891ead05b4cd 100644
--- a/src/Tools/tnl-init.h
+++ b/src/Tools/tnl-init.h
@@ -34,14 +34,14 @@ bool renderFunction( const Config::ParameterContainer& parameters )
    if( ! meshPointer->load( meshFile ) )
       return false;
-   typedef Functions::TestFunction< MeshType::meshDimension, RealType > FunctionType;
+   typedef Functions::TestFunction< MeshType::getMeshDimension(), RealType > FunctionType;
    typedef SharedPointer< FunctionType, typename MeshType::DeviceType > FunctionPointer;
    FunctionPointer function;
    std::cout << "Setting up the function ... " << std::endl;
    if( ! function->setup( parameters, "" ) )
       return false;
    std::cout << "done." << std::endl;
-   typedef Functions::MeshFunction< MeshType, MeshType::meshDimension > MeshFunctionType;
+   typedef Functions::MeshFunction< MeshType, MeshType::getMeshDimension() > MeshFunctionType;
    typedef SharedPointer< MeshFunctionType, typename MeshType::DeviceType > MeshFunctionPointer;
    MeshFunctionPointer meshFunction( meshPointer );
    //if( ! discreteFunction.setSize( mesh.template getEntitiesCount< typename MeshType::Cell >() ) )
diff --git a/src/Tools/tnl-mesh-convert.cpp b/src/Tools/tnl-mesh-convert.cpp
index 84cfd7fbaa57c10679127a365383494ac3f8dd29..d7de402bc72bc3cc84bd03452837cb7bff40833d 100644
--- a/src/Tools/tnl-mesh-convert.cpp
+++ b/src/Tools/tnl-mesh-convert.cpp
@@ -8,7 +8,7 @@
 /* See Copyright Notice in tnl/Copyright */
-#ifndef HAVE_ICPC
 #include "tnl-mesh-convert.h"
 #include <TNL/Config/ParameterContainer.h>
@@ -35,7 +35,7 @@ int main( int argc, char* argv[] )
       conf_desc.printUsage( argv[ 0 ] );
       return EXIT_FAILURE;
-#ifndef HAVE_ICPC
    if( ! convertMesh( parameters ) )
       return EXIT_FAILURE;
diff --git a/src/Tools/tnl-quickstart/Makefile.in b/src/Tools/tnl-quickstart/Makefile.in
index 48f3908b58d9888aa6cda6959ba65c96745793c7..c2954e00e8f855f14bd52eda44fc77f95c8a9416 100644
--- a/src/Tools/tnl-quickstart/Makefile.in
+++ b/src/Tools/tnl-quickstart/Makefile.in
@@ -1,6 +1,3 @@
-# Uncomment the following line to enable CUDA
-#WITH_CUDA = yes
 TARGET = {problemBaseName}
 INSTALL_DIR = ${{HOME}}/local
@@ -16,11 +13,12 @@ endif
 SOURCES = {problemBaseName}.cpp
 HEADERS = {problemBaseName}.h
-OBJECTS = {problemBaseName}.o
 ifdef WITH_CUDA
    OBJECTS = {problemBaseName}-cuda.o
+   OBJECTS = {problemBaseName}.o
 all: $(TARGET)
diff --git a/src/Tools/tnl-quickstart/explicit-laplace-grid-1d_impl.h.in b/src/Tools/tnl-quickstart/explicit-laplace-grid-1d_impl.h.in
index d952cb637c3ca0afdedd577a7b6341d65c8e211e..1e1eaa44a175c7688f1b6864e9f7e5ffaea1fa5e 100644
--- a/src/Tools/tnl-quickstart/explicit-laplace-grid-1d_impl.h.in
+++ b/src/Tools/tnl-quickstart/explicit-laplace-grid-1d_impl.h.in
@@ -1,5 +1,5 @@
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-   const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); 
+   const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+   const IndexType& west = neighborEntities.template getEntityIndex< -1 >(); 
    return ( u[ west ] - 2.0 * u[ center ]  + u[ east ] ) * hxSquareInverse;
\ No newline at end of file
diff --git a/src/Tools/tnl-quickstart/explicit-laplace-grid-2d_impl.h.in b/src/Tools/tnl-quickstart/explicit-laplace-grid-2d_impl.h.in
index ad817b3b31fc422759a76d4093336e67dee6a6dc..8c3b7d796ce12d8bb96fda55e47d55568bd8d5bd 100644
--- a/src/Tools/tnl-quickstart/explicit-laplace-grid-2d_impl.h.in
+++ b/src/Tools/tnl-quickstart/explicit-laplace-grid-2d_impl.h.in
@@ -1,9 +1,9 @@
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();         
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();         
    return ( u[ west ] - 2.0 * u[ center ] + u[ east ]  ) * hxSquareInverse +
           ( u[ south ] - 2.0 * u[ center ] + u[ north ] ) * hySquareInverse;
\ No newline at end of file
diff --git a/src/Tools/tnl-quickstart/explicit-laplace-grid-3d_impl.h.in b/src/Tools/tnl-quickstart/explicit-laplace-grid-3d_impl.h.in
index 7b2f234c2037ee8de516d2d04a0877e307e64e12..aa6ff5f3f7fcc731c57667cc3e12c52ca0087218 100644
--- a/src/Tools/tnl-quickstart/explicit-laplace-grid-3d_impl.h.in
+++ b/src/Tools/tnl-quickstart/explicit-laplace-grid-3d_impl.h.in
@@ -2,12 +2,12 @@
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >(); 
    const RealType& hzSquareInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();         
-   const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-   const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();         
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();         
+   const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+   const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();         
    return ( u[ west ] - 2.0 * u[ center ] + u[ east ]  ) * hxSquareInverse +
           ( u[ south ] - 2.0 * u[ center ] + u[ north ] ) * hySquareInverse +
           ( u[ up ] - 2.0 * u[ center ] + u[ down ] ) * hzSquareInverse;
\ No newline at end of file
diff --git a/src/Tools/tnl-quickstart/implicit-laplace-grid-1d_impl.h.in b/src/Tools/tnl-quickstart/implicit-laplace-grid-1d_impl.h.in
index de086c72e305d76b2130e0a2e09da73f68c40fd1..e5a139e22c202672510f59b607c5d099d2772c4e 100644
--- a/src/Tools/tnl-quickstart/implicit-laplace-grid-1d_impl.h.in
+++ b/src/Tools/tnl-quickstart/implicit-laplace-grid-1d_impl.h.in
@@ -1,7 +1,7 @@
    const RealType& lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-   const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); 
+   const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+   const IndexType& west = neighborEntities.template getEntityIndex< -1 >(); 
    matrixRow.setElement( 0, west,   - lambdaX );
    matrixRow.setElement( 1, center, 2.0 * lambdaX );
    matrixRow.setElement( 2, east,   - lambdaX );
\ No newline at end of file
diff --git a/src/Tools/tnl-quickstart/implicit-laplace-grid-2d_impl.h.in b/src/Tools/tnl-quickstart/implicit-laplace-grid-2d_impl.h.in
index 048949469ddff7d49bbd7ca82e74ff322fa789cd..808d829a88028d9e75c6a6bbaa1b092d3ce4d361 100644
--- a/src/Tools/tnl-quickstart/implicit-laplace-grid-2d_impl.h.in
+++ b/src/Tools/tnl-quickstart/implicit-laplace-grid-2d_impl.h.in
@@ -1,10 +1,10 @@
    const RealType& lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
    const RealType& lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >();         
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >();         
    matrixRow.setElement( 0, south,  -lambdaY );
    matrixRow.setElement( 1, west,   -lambdaX );
    matrixRow.setElement( 2, center, 2.0 * ( lambdaX + lambdaY ) );
diff --git a/src/Tools/tnl-quickstart/implicit-laplace-grid-3d_impl.h.in b/src/Tools/tnl-quickstart/implicit-laplace-grid-3d_impl.h.in
index 2343334c4044143815dc5ebfad19fc2c26e767c3..7bbec43866834f69db96e250aaaec9af8c717586 100644
--- a/src/Tools/tnl-quickstart/implicit-laplace-grid-3d_impl.h.in
+++ b/src/Tools/tnl-quickstart/implicit-laplace-grid-3d_impl.h.in
@@ -2,12 +2,12 @@
    const RealType& lambdaY = tau * entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >(); 
    const RealType& lambdaZ = tau * entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >();         
-   const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-   const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >();                 
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >();         
+   const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+   const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >();                 
    matrixRow.setElement( 0, down,   -lambdaZ );
    matrixRow.setElement( 1, south,  -lambdaY );
    matrixRow.setElement( 2, west,   -lambdaX );
diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
index 1714c59b7395e62486d53775e424d04fa8f7a7eb..3e7d0d670f5de48659c146bbf7b1fb62287e4a38 100644
--- a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
+++ b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in
@@ -36,7 +36,7 @@ operator()( const MeshFunction& u,
    static_assert( MeshEntity::entityDimension == {meshDimension}, "Wrong mesh entity dimensions." );
    static_assert( MeshFunction::getEntitiesDimension() == {meshDimension}, "Wrong preimage function" );
-   const typename MeshEntity::template NeighbourEntities< {meshDimension} >& neighbourEntities = entity.getNeighbourEntities(); 
+   const typename MeshEntity::template NeighborEntities< {meshDimension} >& neighborEntities = entity.getNeighborEntities(); 
@@ -92,7 +92,7 @@ setMatrixElements( const PreimageFunction& u,
     * The following example is the Laplace operator approximated 
     * by the Finite difference method.
-   const typename MeshEntity::template NeighbourEntities< {meshDimension} >& neighbourEntities = entity.getNeighbourEntities();
+   const typename MeshEntity::template NeighborEntities< {meshDimension} >& neighborEntities = entity.getNeighborEntities();
    const IndexType& index = entity.getIndex();
    typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
diff --git a/src/Tools/tnl-quickstart/tnl-quickstart.py b/src/Tools/tnl-quickstart/tnl-quickstart.py
index 5992503861af6ce1faa274a0796237847e2ff53e..f456952696ad52d06fb7e3806e885dbc62d33be1 100644
--- a/src/Tools/tnl-quickstart/tnl-quickstart.py
+++ b/src/Tools/tnl-quickstart/tnl-quickstart.py
@@ -18,7 +18,7 @@ print( "----------------------------------")
 definitions = {}
-definitions['problemName'] = input( "Problam name:" )
+definitions['problemName'] = input( "Problem name:" )
 definitions['problemBaseName'] = input( "Problem class base name (base name acceptable in C++ code):" )
 definitions['operatorName'] = input( "Operator name:")
diff --git a/src/Tools/tnl-view.cpp b/src/Tools/tnl-view.cpp
index 3f4e19304eda56866e0350e888c728aa8457b87f..4a5865ac610e5d0713899f9a72678ab878f13d76 100644
--- a/src/Tools/tnl-view.cpp
+++ b/src/Tools/tnl-view.cpp
@@ -75,7 +75,7 @@ int main( int argc, char* argv[] )
    if( ! parseObjectType( meshType, parsedMeshType ) )
       std::cerr << "Unable to parse the mesh type " << meshType << "." << std::endl;
-      return false;
+      return EXIT_FAILURE;
    if( parsedMeshType[ 0 ] == "Meshes::Grid" ||
        parsedMeshType[ 0 ] == "tnlGrid" )   //  TODO: remove deprecated type name
diff --git a/src/Tools/tnl-view.h b/src/Tools/tnl-view.h
index f4e6542ecbacc9e084ac145df56dbe4938a3bcab..c4d25f1d5520d2886be390bc41763c1f338e45aa 100644
--- a/src/Tools/tnl-view.h
+++ b/src/Tools/tnl-view.h
@@ -282,12 +282,12 @@ bool convertObject( const MeshPointer& meshPointer,
        parsedObjectType[ 0 ] == "tnlVector" )          //
       using MeshType = typename MeshPointer::ObjectType;
-      // FIXME: why is MeshType::IndexType not the same as Index?
+      // FIXME: why is MeshType::GlobalIndexType not the same as Index?
 //      Containers::Vector< Element, Devices::Host, Index > vector;
-      Containers::Vector< Element, Devices::Host, typename MeshType::IndexType > vector;
+      Containers::Vector< Element, Devices::Host, typename MeshType::GlobalIndexType > vector;
       if( ! vector.load( inputFileName ) )
          return false;
-      Functions::MeshFunction< MeshType, MeshType::meshDimension, Element > mf;
+      Functions::MeshFunction< MeshType, MeshType::getMeshDimension(), Element > mf;
       mf.bind( meshPointer, vector );
       if( ! mf.write( outputFileName, outputFormat ) )
          return false;
diff --git a/src/Tools/tnlcurve2gnuplot.cpp b/src/Tools/tnlcurve2gnuplot.cpp
index 16f7f372f37ea7881506aac6d7894036676a0256..5a1e297d60973b8a1a6f8f7f053732cdcce9fb0c 100644
--- a/src/Tools/tnlcurve2gnuplot.cpp
+++ b/src/Tools/tnlcurve2gnuplot.cpp
@@ -58,7 +58,7 @@ int main( int argc, char* argv[] )
       std::cout << "Processing file " << input_file << " ... " << std::flush;
       File file;
-      if( ! file. open( input_files[ i ], tnlReadMode ) )
+      if( ! file. open( input_files[ i ], IOMode::read ) )
          std::cout << " unable to open file " << input_files[ i ] << std::endl;
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
old mode 100755
new mode 100644
index ac80ff9471a888c101a4d4f699eb04c1734e3427..f702830a06cc20b76171ec10fffbe9a89711cce4
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -1,39 +1,43 @@
-#ADD_SUBDIRECTORY( Containers )
-ADD_EXECUTABLE( UniquePointerTest${mpiExt}${debugExt} ${headers} UniquePointerTest.cpp )
-TARGET_LINK_LIBRARIES( UniquePointerTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( UniquePointerTest${mpiExt}${debugExt} UniquePointerTest.cpp )
+TARGET_COMPILE_OPTIONS( UniquePointerTest${mpiExt}${debugExt} PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( UniquePointerTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-   CUDA_ADD_EXECUTABLE( FileTest${mpiExt}${debugExt} FileTest.h FileTest.cu )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   CUDA_ADD_EXECUTABLE( FileTest${mpiExt}${debugExt} FileTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( FileTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
 ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( FileTest${mpiExt}${debugExt} FileTest.h FileTest.cpp )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE( FileTest${mpiExt}${debugExt} FileTest.cpp )
+   TARGET_LINK_LIBRARIES( FileTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
-ADD_EXECUTABLE( StringTest${mpiExt}${debugExt} ${headers} StringTest.cpp )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ADD_EXECUTABLE( ListTest${mpiExt}${debugExt} ${headers} ListTest.cpp )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( StringTest${mpiExt}${debugExt} StringTest.cpp )
+TARGET_LINK_LIBRARIES( StringTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ADD_EXECUTABLE( ObjectTest${mpiExt}${debugExt} ${headers} ObjectTest.cpp )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( ObjectTest${mpiExt}${debugExt} ObjectTest.cpp )
+TARGET_LINK_LIBRARIES( ObjectTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES} 
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
 ADD_TEST( FileTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/FileTest${mpiExt}${debugExt} )
 ADD_TEST( StringTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/StringTest${mpiExt}${debugExt} )
-ADD_TEST( ListTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ListTest${mpiExt}${debugExt} )
-ADD_TEST( ObjectTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${mpiExt}${debugExt} )
\ No newline at end of file
+ADD_TEST( ObjectTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${mpiExt}${debugExt} )
+ADD_TEST( UniquePointerTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/UniquePointerTest${mpiExt}${debugExt} )
+endif( WITH_TESTS STREQUAL "yes" )
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cpp b/src/UnitTests/Containers/ArrayOperationsTest.cpp
index 92c24428b74513d8bda61db2fe5095a7d4e4ff8a..c499a61b2cb1b50eebf9fc7fedacf56b9c7cb68a 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.cpp
+++ b/src/UnitTests/Containers/ArrayOperationsTest.cpp
@@ -9,5 +9,3 @@
 /* See Copyright Notice in tnl/Copyright */
 #include "ArrayOperationsTest.h"
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h
index d11489d4d561abaf77acadcbe31870cb5ce63a6f..be75e04fe737e6cb4c4ac032af3656c48623211e 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Containers/ArrayOperationsTest.h
@@ -1,5 +1,5 @@
-                          ArrayOperationsTester.h  -  description
+                          ArrayOperationsTest.h  -  description
     begin                : Jul 15, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -10,304 +10,310 @@
 #pragma once
+#ifdef HAVE_GTEST 
 #include <TNL/Containers/Algorithms/ArrayOperations.h>
 #include <TNL/Devices/Cuda.h>
-#ifdef HAVE_GTEST 
 #include "gtest/gtest.h"
 using namespace TNL;
 using namespace TNL::Containers;
 using namespace TNL::Containers::Algorithms;
-int getTestSize()
+constexpr int ARRAY_TEST_SIZE = 5000;
+// test fixture for typed tests
+template< typename Element >
+class ArrayOperationsTest : public ::testing::Test
-   return 1 << 16;
-   //const int cudaGridSize = 256;
-   //return 1.5 * cudaGridSize * maxCudaBlockSize;
-   //return  1 << 22;
+   using ElementType = Element;
-typedef int Element;
+// types for which ArrayTest is instantiated
+using ElementTypes = ::testing::Types< short int, int, long, float, double >;
-#ifdef HAVE_GTEST
+TYPED_TEST_CASE( ArrayOperationsTest, ElementTypes );
-TEST( ArrayOperationsTest, allocationTest )
+TYPED_TEST( ArrayOperationsTest, allocateMemory_host )
-   Element* data;
-   ArrayOperations< Devices::Host >::allocateMemory( data, getTestSize() );
-   ASSERT_EQ( data, ( Element* ) NULL );
+   using ElementType = typename TestFixture::ElementType;
+   ElementType* data;
+   ArrayOperations< Devices::Host >::allocateMemory( data, ARRAY_TEST_SIZE );
+   ASSERT_NE( data, nullptr );
    ArrayOperations< Devices::Host >::freeMemory( data );
-TEST( ArrayOperationsTest, memorySetTest )
+TYPED_TEST( ArrayOperationsTest, setMemoryElement_host )
-   const int size = 1024;
-   Element *data;
-   ArrayOperations< Devices::Host > :: allocateMemory( data, size );
-   ArrayOperations< Devices::Host > :: setMemory( data, 13, size );
-   for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( data[ i ], 13 );
-   ArrayOperations< Devices::Host > :: freeMemory( data );
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
+   ElementType *data;
+   ArrayOperations< Devices::Host >::allocateMemory( data, size );
+   for( int i = 0; i < size; i++ ) {
+      ArrayOperations< Devices::Host >::setMemoryElement( data + i, (ElementType) i );
+      EXPECT_EQ( data[ i ], i );
+      EXPECT_EQ( ArrayOperations< Devices::Host >::getMemoryElement( data + i ), i );
+   }
+   ArrayOperations< Devices::Host >::freeMemory( data );
-TEST( ArrayOperationsTest, copyMemoryTest )
+TYPED_TEST( ArrayOperationsTest, setMemory_host )
-   const int size = getTestSize();
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
-   Element *data1, *data2;
-   ArrayOperations< Devices::Host > :: allocateMemory( data1, size );
-   ArrayOperations< Devices::Host > :: allocateMemory( data2, size );
-   ArrayOperations< Devices::Host > :: setMemory( data1, 13, size );
-   ArrayOperations< Devices::Host > :: copyMemory< Element, Element, int >( data2, data1, size );
+   ElementType *data;
+   ArrayOperations< Devices::Host >::allocateMemory( data, size );
+   ArrayOperations< Devices::Host >::setMemory( data, (ElementType) 13, size );
    for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( data1[ i ], data2[ i ]);
-   ArrayOperations< Devices::Host > :: freeMemory( data1 );
-   ArrayOperations< Devices::Host > :: freeMemory( data2 );
+      EXPECT_EQ( data[ i ], 13 );
+   ArrayOperations< Devices::Host >::freeMemory( data );
+TYPED_TEST( ArrayOperationsTest, copyMemory_host )
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
+   ElementType *data1, *data2;
+   ArrayOperations< Devices::Host >::allocateMemory( data1, size );
+   ArrayOperations< Devices::Host >::allocateMemory( data2, size );
+   ArrayOperations< Devices::Host >::setMemory( data1, (ElementType) 13, size );
+   ArrayOperations< Devices::Host >::copyMemory< ElementType, ElementType >( data2, data1, size );
+   for( int i = 0; i < size; i ++ )
+      EXPECT_EQ( data1[ i ], data2[ i ]);
+   ArrayOperations< Devices::Host >::freeMemory( data1 );
+   ArrayOperations< Devices::Host >::freeMemory( data2 );
-TEST( ArrayOperationsTest, copyMemoryWithConversionTest )
+TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversion_host )
-   const int size = getTestSize();
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
    int *data1;
    float *data2;
-   ArrayOperations< Devices::Host > :: allocateMemory( data1, size );
-   ArrayOperations< Devices::Host > :: allocateMemory( data2, size );
-   ArrayOperations< Devices::Host > :: setMemory( data1, 13, size );
-   ArrayOperations< Devices::Host > :: copyMemory< float, int, int >( data2, data1, size );
+   ArrayOperations< Devices::Host >::allocateMemory( data1, size );
+   ArrayOperations< Devices::Host >::allocateMemory( data2, size );
+   ArrayOperations< Devices::Host >::setMemory( data1, 13, size );
+   ArrayOperations< Devices::Host >::copyMemory< float, int >( data2, data1, size );
    for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( data1[ i ], data2[ i ] );
-   ArrayOperations< Devices::Host > :: freeMemory( data1 );
-   ArrayOperations< Devices::Host > :: freeMemory( data2 );
+      EXPECT_EQ( data1[ i ], data2[ i ] );
+   ArrayOperations< Devices::Host >::freeMemory( data1 );
+   ArrayOperations< Devices::Host >::freeMemory( data2 );
-TEST( ArrayOperationsTest, compareMemoryTest )
+TYPED_TEST( ArrayOperationsTest, compareMemory_host )
-   const int size = getTestSize();
-   int *data1, *data2;
-   ArrayOperations< Devices::Host > :: allocateMemory( data1, size );
-   ArrayOperations< Devices::Host > :: allocateMemory( data2, size );
-   ArrayOperations< Devices::Host > :: setMemory( data1, 7, size );
-   ASSERT_FALSE( ( ArrayOperations< Devices::Host > :: compareMemory< int, int, int >( data1, data2, size ) ) );
-   ArrayOperations< Devices::Host > :: setMemory( data2, 7, size );
-   ASSERT_TRUE( ( ArrayOperations< Devices::Host > :: compareMemory< int, int, int >( data1, data2, size ) ) );
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
+   ElementType *data1, *data2;
+   ArrayOperations< Devices::Host >::allocateMemory( data1, size );
+   ArrayOperations< Devices::Host >::allocateMemory( data2, size );
+   ArrayOperations< Devices::Host >::setMemory( data1, (ElementType) 7, size );
+   ArrayOperations< Devices::Host >::setMemory( data2, (ElementType) 0, size );
+   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::setMemory( data2, (ElementType) 7, size );
+   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::freeMemory( data1 );
+   ArrayOperations< Devices::Host >::freeMemory( data2 );
-TEST( ArrayOperationsTest, compareMemoryWithConversionTest )
+TYPED_TEST( ArrayOperationsTest, compareMemoryWithConversion_host )
-   const int size = getTestSize();
+   const int size = ARRAY_TEST_SIZE;
    int *data1;
    float *data2;
-   ArrayOperations< Devices::Host > :: allocateMemory( data1, size );
-   ArrayOperations< Devices::Host > :: allocateMemory( data2, size );
-   ArrayOperations< Devices::Host > :: setMemory( data1, 7, size );
-   ASSERT_FALSE( ( ArrayOperations< Devices::Host > :: compareMemory< int, float, int >( data1, data2, size ) ) );
-   ArrayOperations< Devices::Host > :: setMemory( data2, ( float ) 7.0, size );
-   ASSERT_TRUE( ( ArrayOperations< Devices::Host > :: compareMemory< int, float, int >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::allocateMemory( data1, size );
+   ArrayOperations< Devices::Host >::allocateMemory( data2, size );
+   ArrayOperations< Devices::Host >::setMemory( data1, 7, size );
+   ArrayOperations< Devices::Host >::setMemory( data2, (float) 0.0, size );
+   EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compareMemory< int, float >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::setMemory( data2, (float) 7.0, size );
+   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< int, float >( data1, data2, size ) ) );
+   ArrayOperations< Devices::Host >::freeMemory( data1 );
+   ArrayOperations< Devices::Host >::freeMemory( data2 );
 #ifdef HAVE_CUDA
-TEST( ArrayOperationsTest, allocationTest )
+TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda )
-   int* data;
-   ArrayOperations< Devices::Cuda >::allocateMemory( data, getTestSize() );
-   ASSERT_TRUE( checkCudaDevice );
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
-   ArrayOperations< Devices::Cuda >::freeMemory( data );
-   ASSERT_TRUE( checkCudaDevice );
-TEST( ArrayOperationsTest, setMemoryElementTest )
-   const int size( 1024 );
-   int* data;
+   ElementType* data;
    ArrayOperations< Devices::Cuda >::allocateMemory( data, size );
-   ASSERT_TRUE( checkCudaDevice );
-   for( int i = 0; i < getTestSize(); i++ )
-      ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], i );
-   for( int i = 0; i < size; i++ )
-   {
-      int d;
-      ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( int ), cudaMemcpyDeviceToHost ), cudaSuccess );
-      ASSERT_EQ( d, i );
-   }
+   ASSERT_NE( data, nullptr );
    ArrayOperations< Devices::Cuda >::freeMemory( data );
-   ASSERT_TRUE( checkCudaDevice );
-TEST( ArrayOperationsTest, getMemoryElementTest )
+TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda )
-   const int size( 1024 );
-   int* data;
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
+   ElementType* data;
    ArrayOperations< Devices::Cuda >::allocateMemory( data, size );
-   ASSERT_TRUE( checkCudaDevice );
-   for( int i = 0; i < getTestSize(); i++ )
-      ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], i );
+   for( int i = 0; i < size; i++ )
+      ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], (ElementType) i );
    for( int i = 0; i < size; i++ )
-      ASSERT_EQ( ( ArrayOperations< Devices::Cuda >::getMemoryElement( &data[ i ] ), i ) );
+   {
+      ElementType d;
+      ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ElementType ), cudaMemcpyDeviceToHost ), cudaSuccess );
+      EXPECT_EQ( d, i );
+      EXPECT_EQ( ArrayOperations< Devices::Cuda >::getMemoryElement( &data[ i ] ), i );
+   }
    ArrayOperations< Devices::Cuda >::freeMemory( data );
-   ASSERT_TRUE( checkCudaDevice );
-TEST( ArrayOperationsTest, smallMemorySetTest )
+TYPED_TEST( ArrayOperationsTest, setMemory_cuda )
-   const int size = 1024;
-   int *hostData, *deviceData;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
-   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData, 0, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, 13, size );
-   ASSERT_TRUE( checkCudaDevice );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int, int >( hostData, deviceData, size );
-   ASSERT_TRUE( checkCudaDevice );
-   for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( hostData[ i ], 13 );
-   ArrayOperations< Devices::Cuda >::freeMemory( hostData );
-   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
-TEST( ArrayOperationsTest, bigMemorySetTest )
-   const int size( getTestSize() );
-   int *hostData, *deviceData;
+   ElementType *hostData, *deviceData;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData, 0, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, 13, size );
-   ASSERT_TRUE( checkCudaDevice );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int, int >( hostData, deviceData, size );
-   ASSERT_TRUE( checkCudaDevice );
-   for( int i = 0; i < size; i += 100 )
-   {
-      if( hostData[ i ] != 13 )
-      ASSERT_EQ( hostData[ i ], 13 );
-   }
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 0, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 13, size );
+   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ElementType, ElementType >( hostData, deviceData, size );
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( hostData[ i ], 13 );
    ArrayOperations< Devices::Host >::freeMemory( hostData );
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
-TEST( ArrayOperationsTest, copyMemoryTest )
+TYPED_TEST( ArrayOperationsTest, copyMemory_cuda )
-   const int size = getTestSize();
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
-   int *hostData1, *hostData2, *deviceData;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData1, size );
+   ElementType *hostData, *hostData2, *deviceData, *deviceData2;
+   ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData1, 13, size );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int, int >( deviceData, hostData1, size );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int, int >( hostData2, deviceData, size );
-   ASSERT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< int, int >( hostData1, hostData2, size) ) );
-   ArrayOperations< Devices::Host >::freeMemory( hostData1 );
+   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 13, size );
+   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< ElementType >( deviceData, hostData, size );
+   ArrayOperations< Devices::Cuda >::copyMemory< ElementType, ElementType >( deviceData2, deviceData, size );
+   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ElementType, ElementType >( hostData2, deviceData2, size );
+   EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compareMemory< ElementType, ElementType >( hostData, hostData2, size) ) );
+   ArrayOperations< Devices::Host >::freeMemory( hostData );
    ArrayOperations< Devices::Host >::freeMemory( hostData2 );
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 );
-TEST( ArrayOperationsTest, copyMemoryWithConversionHostToCudaTest )
+TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda )
-   const int size = getTestSize();
-   int *hostData1;
-   float *hostData2, *deviceData;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData1, size );
-   ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
-   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData1, 13, size );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float, int, int >( deviceData, hostData1, size );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float, float, int >( hostData2, deviceData, size );
-   for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( hostData1[ i ], hostData2[ i ] );
-   ArrayOperations< Devices::Host >::freeMemory( hostData1 );
-   ArrayOperations< Devices::Host >::freeMemory( hostData2 );
-   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
+   const int size = ARRAY_TEST_SIZE;
-TEST( ArrayOperationsTest, copyMemoryWithConversionCudaToHostTest )
-   const int size = getTestSize();
-   int *hostData1, *deviceData;
-   float *hostData2;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData1, size );
+   int *hostData;
+   double *hostData2;
+   long *deviceData;
+   float *deviceData2;
+   ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData1, 13, size );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int, int >( deviceData, hostData1, size );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float, int, int >( hostData2, deviceData, size );
-   for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( hostData1[ i ], hostData2[ i ] );
-   ArrayOperations< Devices::Host >::freeMemory( hostData1 );
-   ArrayOperations< Devices::Host >::freeMemory( hostData2 );
-   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
-TEST( ArrayOperationsTest, copyMemoryWithConversionCudaToCudaTest )
-   const int size = getTestSize();
-   int *hostData1, *deviceData1;
-   float *hostData2, *deviceData2;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData1, size );
-   ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
-   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData1, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData1, 13, size );
-   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int, int, int >( deviceData1, hostData1, size );
-   ArrayOperations< Devices::Cuda >::copyMemory< float, int, int >( deviceData2, deviceData1, size );
-   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float, float, int >( hostData2, deviceData2, size );
+   ArrayOperations< Devices::Host >::setMemory( hostData, 13, size );
+   ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long, int >( deviceData, hostData, size );
+   ArrayOperations< Devices::Cuda >::copyMemory< float, long >( deviceData2, deviceData, size );
+   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double, float >( hostData2, deviceData2, size );
    for( int i = 0; i < size; i ++ )
-      ASSERT_EQ( hostData1[ i ], hostData2[ i ] );
-   ArrayOperations< Devices::Host >::freeMemory( hostData1 );
+      EXPECT_EQ( hostData[ i ], hostData2[ i ] );
+   ArrayOperations< Devices::Host >::freeMemory( hostData );
    ArrayOperations< Devices::Host >::freeMemory( hostData2 );
-   ArrayOperations< Devices::Cuda >::freeMemory( deviceData1 );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 );
-TEST( ArrayOperationsTest, compareMemoryHostCudaTest )
+TYPED_TEST( ArrayOperationsTest, compareMemory_cuda )
-   const int size = getTestSize();
-   int *hostData, *deviceData;
+   using ElementType = typename TestFixture::ElementType;
+   const int size = ARRAY_TEST_SIZE;
+   ElementType *hostData, *deviceData, *deviceData2;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Host >::setMemory( hostData, 7, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, 8, size );
-   ASSERT_FALSE( ( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, int, int >( hostData, deviceData, size ) ) );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, 7, size );
-   ASSERT_TRUE( ( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, int, int >( hostData, deviceData, size ) ) );
+   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ElementType) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 8, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ElementType) 9, size );
+   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ElementType, ElementType >( hostData, deviceData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ElementType, ElementType >( deviceData, hostData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ElementType, ElementType >( deviceData, deviceData2, size ) ));
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ElementType) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ElementType) 7, size );
+   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ElementType, ElementType >( hostData, deviceData, size ) ));
+   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ElementType, ElementType >( deviceData, hostData, size ) ));
+   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compareMemory< ElementType, ElementType >( deviceData, deviceData2, size ) ));
-TEST( ArrayOperationsTest, compareMemoryWithConversionHostCudaTest )
+   ArrayOperations< Devices::Host >::freeMemory( hostData );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 );
+TYPED_TEST( ArrayOperationsTest, compareMemoryWithConversions_cuda )
-   const int size = getTestSize();
+   const int size = ARRAY_TEST_SIZE;
    int *hostData;
    float *deviceData;
+   double *deviceData2;
    ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
    ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
+   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
    ArrayOperations< Devices::Host >::setMemory( hostData, 7, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, ( float ) 8.0, size );
-   ASSERT_FALSE( ( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, float, int >( hostData, deviceData, size ) ) );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, ( float ) 7.0, size );
-   ASSERT_TRUE( ( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, float, int >( hostData, deviceData, size ) ) );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (float) 8, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (double) 9, size );
+   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, float >( hostData, deviceData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float, int >( deviceData, hostData, size ) ));
+   // TODO: missing implementation of relevant reduction operation on CUDA with different types
+//   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< float, double >( deviceData, deviceData2, size ) ));
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (float) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (double) 7, size );
+   EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, float >( hostData, deviceData, size ) ));
+   EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float, int >( deviceData, hostData, size ) ));
+   // TODO: missing implementation of relevant reduction operation on CUDA with different types
+//   EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compareMemory< float, double >( deviceData, deviceData2, size ) ));
+   ArrayOperations< Devices::Host >::freeMemory( hostData );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
+   ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 );
 #endif // HAVE_CUDA
 #endif // HAVE_GTEST
+#include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h
index a930efdbb5e5c15b8fb317c21879864d4e5b1d12..7b9bc2f5659644199c91c3a291857cc5e18e2564 100644
--- a/src/UnitTests/Containers/ArrayTest.h
+++ b/src/UnitTests/Containers/ArrayTest.h
@@ -1,5 +1,5 @@
-                          ArrayTester.h -  description
+                          ArrayTest.h -  description
     begin                : Jul 4, 2012
     copyright            : (C) 2012 by Tomas Oberhuber
@@ -10,255 +10,522 @@
 #pragma once
+#ifdef HAVE_GTEST 
+#include <type_traits>
 #include <TNL/Containers/Array.h>
-#include <TNL/Devices/Host.h>
-#ifdef HAVE_GTEST 
 #include "gtest/gtest.h"
 using namespace TNL;
 using namespace TNL::Containers;
-#ifdef HAVE_CUDA
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( Array< ElementType, Devices::Cuda, IndexType >* u );
-class testingClassForArrayTester
+// minimal custom data structure usable as ElementType in Array
+struct MyData
-   public:
+   double data;
+   __cuda_callable__
+   MyData() : data(0) {}
+   template< typename T >
+   __cuda_callable__
+   MyData( T v ) : data(v) {}
-      static String getType()
-      {
-         return String( "testingClassForArrayTester" );
-      };
+   bool operator==( const MyData& v ) const { return data == v.data; }
+   // operator used in tests, not necessary for Array to work
+   template< typename T >
+   bool operator==( T v ) const { return data == v; }
+   static String getType()
+   {
+      return String( "MyData" );
+   }
-String getType( const testingClassForArrayTester& c )
+std::ostream& operator<<( std::ostream& str, const MyData& v )
-   return String( "testingClassForArrayTester" );
+   return str << v.data;
-#ifdef HAVE_GTEST
+// test fixture for typed tests
+template< typename Array >
+class ArrayTest : public ::testing::Test
+   using ArrayType = Array;
-// TODO: Fix this
+// types for which ArrayTest is instantiated
+using ArrayTypes = ::testing::Types<
+   Array< short,  Devices::Host, short >,
+   Array< int,    Devices::Host, short >,
+   Array< long,   Devices::Host, short >,
+   Array< float,  Devices::Host, short >,
+   Array< double, Devices::Host, short >,
+   Array< MyData, Devices::Host, short >,
+   Array< short,  Devices::Host, int >,
+   Array< int,    Devices::Host, int >,
+   Array< long,   Devices::Host, int >,
+   Array< float,  Devices::Host, int >,
+   Array< double, Devices::Host, int >,
+   Array< MyData, Devices::Host, int >,
+   Array< short,  Devices::Host, long >,
+   Array< int,    Devices::Host, long >,
+   Array< long,   Devices::Host, long >,
+   Array< float,  Devices::Host, long >,
+   Array< double, Devices::Host, long >,
+   Array< MyData, Devices::Host, long >
+   // FIXME: this segfaults in String::~String()
+//   , Array< String, Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,
+   Array< short,  Devices::Cuda, short >,
+   Array< int,    Devices::Cuda, short >,
+   Array< long,   Devices::Cuda, short >,
+   Array< float,  Devices::Cuda, short >,
+   Array< double, Devices::Cuda, short >,
+   Array< MyData, Devices::Cuda, short >,
+   Array< short,  Devices::Cuda, int >,
+   Array< int,    Devices::Cuda, int >,
+   Array< long,   Devices::Cuda, int >,
+   Array< float,  Devices::Cuda, int >,
+   Array< double, Devices::Cuda, int >,
+   Array< MyData, Devices::Cuda, int >,
+   Array< short,  Devices::Cuda, long >,
+   Array< int,    Devices::Cuda, long >,
+   Array< long,   Devices::Cuda, long >,
+   Array< float,  Devices::Cuda, long >,
+   Array< double, Devices::Cuda, long >,
+   Array< MyData, Devices::Cuda, long >
+#ifdef HAVE_MIC
+   ,
+   Array< short,  Devices::MIC, short >,
+   Array< int,    Devices::MIC, short >,
+   Array< long,   Devices::MIC, short >,
+   Array< float,  Devices::MIC, short >,
+   Array< double, Devices::MIC, short >,
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, short >,
+   Array< short,  Devices::MIC, int >,
+   Array< int,    Devices::MIC, int >,
+   Array< long,   Devices::MIC, int >,
+   Array< float,  Devices::MIC, int >,
+   Array< double, Devices::MIC, int >,
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, int >,
+   Array< short,  Devices::MIC, long >,
+   Array< int,    Devices::MIC, long >,
+   Array< long,   Devices::MIC, long >,
+   Array< float,  Devices::MIC, long >,
+   Array< double, Devices::MIC, long >
+   // TODO: MyData does not work on MIC
+//   Array< MyData, Devices::MIC, long >
-using ::testing::Types;
-typedef Types< int, Devices::Host, int > MyTypes;
-/*TYPED_TEST_CASE( ArrayTest, MyTypes );
+TYPED_TEST_CASE( ArrayTest, ArrayTypes );
-TYPED_TEST( ArrayTest, testConstructorDestructor )
+TYPED_TEST( ArrayTest, constructors )
-   typedef Array< TypeParam > ArrayType;
+   using ArrayType = typename TestFixture::ArrayType;
    ArrayType u;
+   EXPECT_EQ( u.getSize(), 0 );
    ArrayType v( 10 );
-   ASSERT_EQ( v.getSize(), 10 );
+   EXPECT_EQ( v.getSize(), 10 );
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ElementType data[ 10 ];
+      ArrayType w( data, 10 );
+      EXPECT_EQ( w.getData(), data );
+      ArrayType z1( w );
+      EXPECT_EQ( z1.getData(), data );
+      EXPECT_EQ( z1.getSize(), 10 );
+      ArrayType z2( w, 1 );
+      EXPECT_EQ( z2.getData(), data + 1 );
+      EXPECT_EQ( z2.getSize(), 9 );
+      ArrayType z3( w, 2, 3 );
+      EXPECT_EQ( z3.getData(), data + 2 );
+      EXPECT_EQ( z3.getSize(), 3 );
+   }
-TYPED_TEST( ArrayTest, testSetSize )
+TYPED_TEST( ArrayTest, setSize )
-   typedef Array< TypeParam > ArrayType;
-   ArrayType u, v;
-   u.setSize( 10 );
-   v.setSize( 10 );
-   ASSERT_EQ( u.getSize(), 10 );
-   ASSERT_EQ( v.getSize(), 10 );
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u;
+   const int maxSize = 10;
+   for( int i = 0; i <= maxSize; i ++ ) {
+      u.setSize( i );
+      EXPECT_EQ( u.getSize(), i );
+   }
+   ArrayType v( u );
+   EXPECT_EQ( v.getSize(), 10 );
+   EXPECT_EQ( v.getData(), u.getData() );
+   v.setSize( 11 );
+   EXPECT_EQ( u.getSize(), 10 );
+   EXPECT_EQ( v.getSize(), 11 );
+   EXPECT_NE( v.getData(), u.getData() );
+   // cast to bool returns true iff size > 0
+   EXPECT_TRUE( (bool) u );
+   EXPECT_FALSE( ! u );
+   u.setSize( 0 );
+   EXPECT_FALSE( (bool) u );
+   EXPECT_TRUE( ! u );
-TYPED_TEST( ArrayTest, testBind )
+TYPED_TEST( ArrayTest, setLike )
-   typedef Array< TypeParam > ArrayType;
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u( 10 );
+   EXPECT_EQ( u.getSize(), 10 );
+   ArrayType v;
+   v.setLike( u );
+   EXPECT_EQ( v.getSize(), u.getSize() );
+   EXPECT_NE( v.getData(), u.getData() );
+TYPED_TEST( ArrayTest, bind )
+   using ArrayType = typename TestFixture::ArrayType;
    ArrayType u( 10 ), v;
-   u.setValue( 27 );
    v.bind( u );
-   ASSERT_EQ( v.getSize(), u.getSize() );
-   ASSERT_EQ( u.getElement( 0 ), 27 );
-   v.setValue( 50 );
-   ASSERT_EQ( u.getElement( 0 ), 50 );
-   u.reset();
-   ASSERT_EQ( u.getSize(), 0 );
-   ASSERT_EQ( v.getElement( 0 ), 50 );
+   EXPECT_EQ( v.getSize(), u.getSize() );
+   EXPECT_EQ( v.getData(), u.getData() );
-   ElementType data[ 10 ] = { 1, 2, 3, 4, 5, 6, 7, 8, 10 };
-   u.bind( data, 10 );
-   ASSERT_EQ( u.getElement( 1 ), 2 );
-   v.bind( u );
-   ASSERT_EQ( v.getElement( 1 ), 2 );
+   // bind array with offset and size
+   ArrayType w;
+   w.bind( u, 2, 3 );
+   EXPECT_EQ( w.getSize(), 3 );
+   EXPECT_EQ( w.getData(), u.getData() + 2 );
+   // setting values
+   u.setValue( 27 );
+   EXPECT_EQ( u.getElement( 0 ), 27 );
+   v.setValue( 50 );
+   EXPECT_EQ( u.getElement( 0 ), 50 );
-   v.setElement( 1, 3 );
-   v.reset();
-   ASSERT_EQ( data[ 1 ], 3 );
+   EXPECT_EQ( u.getSize(), 0 );
+   EXPECT_EQ( v.getElement( 0 ), 50 );
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ElementType data[ 10 ] = { 1, 2, 3, 4, 5, 6, 7, 8, 10 };
+      u.bind( data, 10 );
+      EXPECT_EQ( u.getData(), data );
+      EXPECT_EQ( u.getSize(), 10 );
+      EXPECT_EQ( u.getElement( 1 ), 2 );
+      v.bind( u );
+      EXPECT_EQ( v.getElement( 1 ), 2 );
+      u.reset();
+      v.setElement( 1, 3 );
+      v.reset();
+      EXPECT_EQ( data[ 1 ], 3 );
+   }
-#endif  /* GTEST_HAS_TYPED_TEST */
+TYPED_TEST( ArrayTest, swap )
+   using ArrayType = typename TestFixture::ArrayType;
-typedef int ElementType;
-typedef Devices::Host Device;
-typedef int IndexType;
+   ArrayType u( 10 ), v( 20 );
+   u.setValue( 0 );
+   v.setValue( 1 );
+   u.swap( v );
+   EXPECT_EQ( u.getSize(), 20 );
+   EXPECT_EQ( v.getSize(), 10 );
+   for( int i = 0; i < 20; i++ )
+      EXPECT_EQ( u.getElement( i ), 1 );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
-TEST( ArrayTest, testSetGetElement )
+TYPED_TEST( ArrayTest, reset )
-   using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > u;
-   u. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      u. setElement( i, i );
-   for( int i = 0; i < 10; i ++ )
-      ASSERT_EQ( u. getElement( i ), i );
+   using ArrayType = typename TestFixture::ArrayType;
-   u.setValue( 0 );
-   if( std::is_same< Device, Devices::Host >::value )
-   {
-      for( int i = 0; i < 10; i ++ )
-         u[ i ] =  i;
+   ArrayType u;
+   u.setSize( 100 );
+   EXPECT_EQ( u.getSize(), 100 );
+   EXPECT_NE( u.getData(), nullptr );
+   u.reset();
+   EXPECT_EQ( u.getSize(), 0 );
+   EXPECT_EQ( u.getData(), nullptr );
+   u.setSize( 100 );
+   EXPECT_EQ( u.getSize(), 100 );
+   EXPECT_NE( u.getData(), nullptr );
+   u.reset();
+   EXPECT_EQ( u.getSize(), 0 );
+   EXPECT_EQ( u.getData(), nullptr );
+template< typename Element, typename Index >
+void testArrayElementwiseAccess( Array< Element, Devices::Host, Index >&& u )
+   u.setSize( 10 );
+   for( int i = 0; i < 10; i++ ) {
+      u.setElement( i, i );
+      EXPECT_EQ( u.getData()[ i ], i );
+      EXPECT_EQ( u.getElement( i ), i );
+      EXPECT_EQ( u[ i ], i );
-   if( std::is_same< Device, Devices::Cuda >::value )
-   {
 #ifdef HAVE_CUDA
-      Array< ElementType, Device, IndexType >* kernel_u =
-               Devices::Cuda::passToDevice( u );
-      testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
-      Devices::Cuda::freeFromDevice( kernel_u );
-      ASSERT_TRUE( checkCudaDevice );
+template< typename ElementType, typename IndexType >
+__global__ void testSetGetElementKernel( Array< ElementType, Devices::Cuda, IndexType >* u )
+   if( threadIdx.x < ( *u ).getSize() )
+      ( *u )[ threadIdx.x ] = threadIdx.x;
+#endif /* HAVE_CUDA */
+template< typename Element, typename Index >
+void testArrayElementwiseAccess( Array< Element, Devices::Cuda, Index >&& u )
+#ifdef HAVE_CUDA
+   u.setSize( 10 );
+   using ArrayType = Array< Element, Devices::Cuda, Index >;
+   ArrayType* kernel_u = Devices::Cuda::passToDevice( u );
+   testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
+   Devices::Cuda::freeFromDevice( kernel_u );
+   for( int i = 0; i < 10; i++ ) {
+      EXPECT_EQ( u.getElement( i ), i );
-   for( int i = 0; i < 10; i++ )
-      ASSERT_EQ( u.getElement( i ), i );
-TEST( ArrayTest, testComparisonOperator )
+template< typename Element, typename Index >
+void testArrayElementwiseAccess( Array< Element, Devices::MIC, Index >&& u )
-    using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > u;
-   Array< ElementType, Device, IndexType > v;
-   Array< ElementType, Device, IndexType > w;
-   u. setSize( 10 );
-   v. setSize( 10 );
-   w. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      u. setElement( i, i );
-      v. setElement( i, i );
-      w. setElement( i, 2*1 );
+#ifdef HAVE_MIC
+   // TODO
+TYPED_TEST( ArrayTest, elementwiseAccess )
+   using ArrayType = typename TestFixture::ArrayType;
+   testArrayElementwiseAccess( ArrayType() );
+// TODO: comparison with different device
+TYPED_TEST( ArrayTest, comparisonOperator )
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u( 10 ), v( 10 ), w( 10 );
+   for( int i = 0; i < 10; i ++ ) {
+      u.setElement( i, i );
+      v.setElement( i, i );
+      w.setElement( i, 2 * i );
-   ASSERT_TRUE( u == v );
-   ASSERT_FALSE( u != v );
-   ASSERT_TRUE( u != w );
-   ASSERT_FALSE( u == w );
+   EXPECT_TRUE( u == u );
+   EXPECT_TRUE( u == v );
+   EXPECT_TRUE( v == u );
+   EXPECT_FALSE( u != v );
+   EXPECT_FALSE( v != u );
+   EXPECT_TRUE( u != w );
+   EXPECT_TRUE( w != u );
+   EXPECT_FALSE( u == w );
+   EXPECT_FALSE( w == u );
+   v.setSize( 0 );
+   EXPECT_FALSE( u == v );
+   u.setSize( 0 );
+   EXPECT_TRUE( u == v );
-TEST( ArrayTest, testAssignmentOperator )
+// TODO: comparison with different device
+// TODO: missing implementation of relevant reduction operation on CUDA with different types
+TYPED_TEST( ArrayTest, comparisonOperatorWithDifferentType )
-   using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > u;
-   Array< ElementType, Device, IndexType > v;
-   u. setSize( 10 );
-   v. setSize( 10 );
+   Array< short, typename ArrayType::DeviceType, short > z( 10 );
    for( int i = 0; i < 10; i ++ )
-      u. setElement( i, i );
-   v = u;
-   ASSERT_TRUE( u == v );
-   ASSERT_TRUE( v == u );
-   ASSERT_FALSE( u != v );
-   ASSERT_FALSE( v != u );
-   v.setValue( 0 );
-   Array< ElementType, Devices::Host, IndexType > w;
-   w.setSize( 10 );
-   w = u;
+      z.setElement( i, i );
+   EXPECT_TRUE( u == z );
+   EXPECT_FALSE( u != z );
+   for( int i = 0; i < 10; i ++ )
+      z.setElement( i, 2 * i );
+   EXPECT_FALSE( u == z );
+   EXPECT_TRUE( u != z );
-   ASSERT_TRUE( u == w );
-   ASSERT_FALSE( u != w );
+// TODO: assignment from different device
+TYPED_TEST( ArrayTest, assignmentOperator )
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u( 10 ), v( 10 );
+   for( int i = 0; i < 10; i++ )
+      u.setElement( i, i );
    v.setValue( 0 );
-   v = w;
-   ASSERT_TRUE( v == w );
-   ASSERT_FALSE( v != w );
+   v = u;
+   EXPECT_EQ( u, v );
-TEST( ArrayTest, testGetSize )
+// test works only for arithmetic types
+template< typename ArrayType,
+          typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ElementType >::value >::type >
+void testArrayAssignmentWithDifferentType()
-   using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > u;
-   const int maxSize = 10;
-   for( int i = 0; i < maxSize; i ++ )
-      u. setSize( i );
+   ArrayType u( 10 );
+   for( int i = 0; i < 10; i++ )
+      u.setElement( i, i );
+   Array< short, typename ArrayType::DeviceType, short > v( 10 );
+   v.setValue( 0 );
+   v = u;
+// TODO: missing implementation of relevant reduction operation on CUDA with different types
+//   EXPECT_EQ( u, v );
+   for( int i = 0; i < 10; i++ )
+      EXPECT_EQ( v.getElement( i ), i );
-   ASSERT_EQ( u. getSize(), maxSize - 1 );
+template< typename ArrayType,
+          typename = typename std::enable_if< ! std::is_arithmetic< typename ArrayType::ElementType >::value >::type,
+          typename = void >
+void testArrayAssignmentWithDifferentType()
-TEST( ArrayTest, testReset )
+// TODO: assignment from different device
+TYPED_TEST( ArrayTest, assignmentOperatorWithDifferentType )
-   using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > u;
-   u. setSize( 100 );
-   ASSERT_EQ( u. getSize(), 100 );
-   u. reset();
-   ASSERT_EQ( u. getSize(), 0 );
-   u. setSize( 100 );
-   ASSERT_EQ( u. getSize(), 100 );
-   u. reset();
-   ASSERT_EQ( u. getSize(), 0 );
+   using ArrayType = typename TestFixture::ArrayType;
+   testArrayAssignmentWithDifferentType< ArrayType >();
-TEST( ArrayTest, testSetSizeAndDestructor )
+TYPED_TEST( ArrayTest, SaveAndLoad )
-   using namespace TNL::Containers;
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u, v;
+   v.setSize( 100 );
    for( int i = 0; i < 100; i ++ )
-   {
-      Array< ElementType, Device, IndexType > u;
-      u. setSize( i );
-   }
+      v.setElement( i, 3.14147 );
+   File file;
+   file.open( "test-file.tnl", IOMode::write );
+   EXPECT_TRUE( v.save( file ) );
+   file.close();
+   file.open( "test-file.tnl", IOMode::read );
+   EXPECT_TRUE( u.load( file ) );
+   EXPECT_EQ( u, v );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
-TEST( ArrayTest, testSaveAndLoad )
+TYPED_TEST( ArrayTest, boundLoad )
-   using namespace TNL::Containers;
-   Array< ElementType, Device, IndexType > v;
-   v. setSize( 100 );
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u, v, w;
+   v.setSize( 100 );
    for( int i = 0; i < 100; i ++ )
-      v. setElement( i, 3.14147 );
+      v.setElement( i, 3.14147 );
    File file;
-   file. open( "test-file.tnl", tnlWriteMode );
-   v. save( file );
-   file. close();
-   Array< ElementType, Device, IndexType > u;
-   file. open( "test-file.tnl", tnlReadMode );
-   u. load( file );
-   file. close();
-   ASSERT_TRUE( u == v );
+   file.open( "test-file.tnl", IOMode::write );
+   EXPECT_TRUE( v.save( file ) );
+   file.close();
+   w.setSize( 100 );
+   u.bind( w );
+   file.open( "test-file.tnl", IOMode::read );
+   EXPECT_TRUE( u.boundLoad( file ) );
+   EXPECT_EQ( u, v );
+   EXPECT_EQ( u.getData(), w.getData() );
+   u.setSize( 50 );
+   file.open( "test-file.tnl", IOMode::read );
+   EXPECT_FALSE( u.boundLoad( file ) );
+   u.reset();
+   file.open( "test-file.tnl", IOMode::read );
+   EXPECT_TRUE( u.boundLoad( file ) );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
-TEST( ArrayTest, testUnusualStructures )
+TYPED_TEST( ArrayTest, referenceCountingConstructors )
-   using namespace TNL::Containers;
-   Array< testingClassForArrayTester >u;
+   using ArrayType = typename TestFixture::ArrayType;
+   // copies of a dynamic array
+   ArrayType u( 10 );
+   ArrayType v( u );
+   ArrayType w( v );
+   EXPECT_EQ( v.getData(), u.getData() );
+   EXPECT_EQ( w.getData(), u.getData() );
+   // copies of a static array
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ElementType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+      ArrayType u( data, 10 );
+      ArrayType v( u );
+      ArrayType w( v );
+      EXPECT_EQ( u.getData(), data );
+      EXPECT_EQ( v.getData(), data );
+      EXPECT_EQ( w.getData(), data );
+   }
-#endif /* HAVE_GTEST */
-#ifdef HAVE_CUDA
-template< typename ElementType, typename IndexType >
-__global__ void testSetGetElementKernel( Array< ElementType, Devices::Cuda, IndexType >* u )
+TYPED_TEST( ArrayTest, referenceCountingBind )
-   if( threadIdx.x < ( *u ).getSize() )
-      ( *u )[ threadIdx.x ] = threadIdx.x;
+   using ArrayType = typename TestFixture::ArrayType;
+   // copies of a dynamic array
+   ArrayType u( 10 );
+   ArrayType v;
+   v.bind( u );
+   ArrayType w;
+   w.bind( v );
+   EXPECT_EQ( v.getData(), u.getData() );
+   EXPECT_EQ( w.getData(), u.getData() );
+   // copies of a static array
+   if( std::is_same< typename ArrayType::DeviceType, Devices::Host >::value ) {
+      typename ArrayType::ElementType data[ 10 ] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+      ArrayType u( data, 10 );
+      ArrayType v;
+      v.bind( u );
+      ArrayType w;
+      w.bind( v );
+      EXPECT_EQ( u.getData(), data );
+      EXPECT_EQ( v.getData(), data );
+      EXPECT_EQ( w.getData(), data );
+   }
-#endif /* HAVE_CUDA */
+// TODO: test all __cuda_callable__ methods from a CUDA kernel
+#endif // HAVE_GTEST
+#include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
old mode 100755
new mode 100644
index 3f6b1c4c2d8227734baad4de62a3893c15cbce73..3aa747857a93471dfb06375d746689d1dab9b25e
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -1,67 +1,83 @@
-   CUDA_ADD_EXECUTABLE( ArrayOperationsTest${mpiExt}${debugExt} ArrayOperationsTest.h ArrayOperationsTest.cu )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( ArrayOperationsTest${mpiExt}${debugExt} ArrayOperationsTest.h ArrayOperationsTest.cpp )
-   TARGET_LINK_LIBRARIES( ArrayOperationsTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( ListTest${mpiExt}${debugExt} ListTest.cpp )
+TARGET_LINK_LIBRARIES( ListTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-   CUDA_ADD_EXECUTABLE( ArrayTest${mpiExt}${debugExt} ArrayTest.h ArrayTest.cu )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( ArrayTest${mpiExt}${debugExt} ArrayTest.h ArrayTest.cpp )
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   CUDA_ADD_EXECUTABLE( ArrayOperationsTest${mpiExt}${debugExt} ArrayOperationsTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ArrayOperationsTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE( ArrayOperationsTest${mpiExt}${debugExt} ArrayOperationsTest.cpp )
+   TARGET_COMPILE_OPTIONS( ArrayOperationsTest${mpiExt}${debugExt} PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ArrayOperationsTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
-   CUDA_ADD_EXECUTABLE( MultiArrayTest${mpiExt}${debugExt} MultiArrayTest.h MultiArrayTest.cu )
-   TARGET_LINK_LIBRARIES( MultiArrayTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( StaticArrayTest${mpiExt}${debugExt} StaticArrayTest.h StaticArrayTest.cpp )
-   TARGET_LINK_LIBRARIES( StaticArrayTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   CUDA_ADD_EXECUTABLE( ArrayTest${mpiExt}${debugExt} ArrayTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( ArrayTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE( ArrayTest${mpiExt}${debugExt} ArrayTest.cpp )
+   TARGET_LINK_LIBRARIES( ArrayTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
-   CUDA_ADD_EXECUTABLE( VectorOperationsTest${mpiExt}${debugExt} VectorOperationsTest.h VectorOperationsTest.cu )
-   TARGET_LINK_LIBRARIES( VectorOperationsTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( VectorOperationsTest${mpiExt}${debugExt} VectorOperationsTest.h VectorOperationsTest.cpp )
-   TARGET_LINK_LIBRARIES( VectorOperationsTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( StaticArrayTest${mpiExt}${debugExt} StaticArrayTest.cpp )
+TARGET_LINK_LIBRARIES( StaticArrayTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+# NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time
-   CUDA_ADD_EXECUTABLE( StaticVectorTest${mpiExt}${debugExt} StaticVectorTest.h StaticVectorTest.cu )
-   TARGET_LINK_LIBRARIES( StaticVectorTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
-ELSE(  BUILD_CUDA )               
-   ADD_EXECUTABLE( StaticVectorTest${mpiExt}${debugExt} StaticVectorTest.h StaticVectorTest.cpp )
-   TARGET_LINK_LIBRARIES( StaticVectorTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
-                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   CUDA_ADD_EXECUTABLE( VectorTest${mpiExt}${debugExt} VectorTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( VectorTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE( VectorTest${mpiExt}${debugExt} VectorTest.cpp )
+   TARGET_LINK_LIBRARIES( VectorTest${mpiExt}${debugExt}
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_EXECUTABLE( StaticVectorTest${mpiExt}${debugExt} StaticVectorTest.cpp )
+TARGET_COMPILE_OPTIONS( StaticVectorTest${mpiExt}${debugExt} PRIVATE ${CXX_TESTS_FLAGS} )
+TARGET_LINK_LIBRARIES( StaticVectorTest${mpiExt}${debugExt}
+                           ${GTEST_BOTH_LIBRARIES}
+                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+#   CUDA_ADD_EXECUTABLE( MultiArrayTest${mpiExt}${debugExt} MultiArrayTest.cu
+#                        OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultiArrayTest${mpiExt}${debugExt}
+#                              ${GTEST_BOTH_LIBRARIES}
+#                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+#   ADD_EXECUTABLE( MultiArrayTest${mpiExt}${debugExt} MultiArrayTest.cpp )
+#   TARGET_COMPILE_OPTIONS( MultiArrayTest${mpiExt}${debugExt} PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultiArrayTest${mpiExt}${debugExt}
+#                              ${GTEST_BOTH_LIBRARIES}
+#                              tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_TEST( ListTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ListTest${mpiExt}${debugExt} )
 ADD_TEST( ArrayOperationsTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${mpiExt}${debugExt} )
 ADD_TEST( ArrayTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${mpiExt}${debugExt} )
-ADD_TEST( MultiArrayTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/MultiArrayTest${mpiExt}${debugExt} )
 ADD_TEST( StaticArrayTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${mpiExt}${debugExt} )
-ADD_TEST( VectorOperationsTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/VectorOperationsTest${mpiExt}${debugExt} )
-ADD_TEST( StaticVectorTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${mpiExt}${debugExt} )
\ No newline at end of file
+ADD_TEST( VectorTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/VectorTest${mpiExt}${debugExt} )
+ADD_TEST( StaticVectorTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${mpiExt}${debugExt} )
+#ADD_TEST( MultiArrayTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/MultiArrayTest${mpiExt}${debugExt} )
diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ec81c1f0bf8f3d25be8d0daf98e2625cdc65ab69
--- /dev/null
+++ b/src/UnitTests/Containers/ListTest.cpp
@@ -0,0 +1,125 @@
+                          ListTest.cpp  -  description
+                             -------------------
+    begin                : Feb 15, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+#include <TNL/Containers/List.h>
+using namespace TNL;
+using namespace TNL::Containers;
+// test fixture for typed tests
+template< typename List >
+class ListTest : public ::testing::Test
+   using ListType = List;
+// types for which ListTest is instantiated
+using ListTypes = ::testing::Types<
+   List< short  >,
+   List< int    >,
+   List< long   >,
+   List< float  >,
+   List< double >,
+   List< String >
+TYPED_TEST_CASE( ListTest, ListTypes );
+TYPED_TEST( ListTest, constructor )
+   using ListType = typename TestFixture::ListType;
+   ListType list;
+   EXPECT_TRUE( list.isEmpty() );
+   EXPECT_EQ( list.getSize(), 0 );
+   list.Append( 0 );
+   EXPECT_EQ( list.getSize(), 1 );
+   ListType copy( list );
+   list.Append( 0 );
+   EXPECT_EQ( list.getSize(), 2 );
+   EXPECT_EQ( copy.getSize(), 1 );
+   EXPECT_EQ( copy[ 0 ], list[ 0 ] );
+TYPED_TEST( ListTest, operations )
+   using ListType = typename TestFixture::ListType;
+   using ElementType = typename ListType::ElementType;
+   ListType a, b;
+   a.Append( 0 );
+   a.Append( 1 );
+   a.Prepend( 2 );
+   a.Insert( 3, 1 );
+   EXPECT_EQ( a.getSize(), 4 );
+   EXPECT_EQ( a[ 0 ], (ElementType) 2 );
+   EXPECT_EQ( a[ 1 ], (ElementType) 3 );
+   EXPECT_EQ( a[ 2 ], (ElementType) 0 );
+   EXPECT_EQ( a[ 3 ], (ElementType) 1 );
+   b = a;
+   EXPECT_EQ( b.getSize(), 4 );
+   EXPECT_EQ( a, b );
+   b.Insert( 4, 4 );
+   EXPECT_NE( a, b );
+   EXPECT_EQ( b[ 4 ], (ElementType) 4 );
+   a.AppendList( b );
+   EXPECT_EQ( a.getSize(), 9 );
+   EXPECT_EQ( a[ 0 ], (ElementType) 2 );
+   EXPECT_EQ( a[ 1 ], (ElementType) 3 );
+   EXPECT_EQ( a[ 2 ], (ElementType) 0 );
+   EXPECT_EQ( a[ 3 ], (ElementType) 1 );
+   EXPECT_EQ( a[ 4 ], (ElementType) 2 );
+   EXPECT_EQ( a[ 5 ], (ElementType) 3 );
+   EXPECT_EQ( a[ 6 ], (ElementType) 0 );
+   EXPECT_EQ( a[ 7 ], (ElementType) 1 );
+   EXPECT_EQ( a[ 8 ], (ElementType) 4 );
+   a.PrependList( b );
+   EXPECT_EQ( a.getSize(), 14 );
+   EXPECT_EQ( a[ 0 ],  (ElementType) 2 );
+   EXPECT_EQ( a[ 1 ],  (ElementType) 3 );
+   EXPECT_EQ( a[ 2 ],  (ElementType) 0 );
+   EXPECT_EQ( a[ 3 ],  (ElementType) 1 );
+   EXPECT_EQ( a[ 4 ],  (ElementType) 4 );
+   EXPECT_EQ( a[ 5 ],  (ElementType) 2 );
+   EXPECT_EQ( a[ 6 ],  (ElementType) 3 );
+   EXPECT_EQ( a[ 7 ],  (ElementType) 0 );
+   EXPECT_EQ( a[ 8 ],  (ElementType) 1 );
+   EXPECT_EQ( a[ 9 ],  (ElementType) 2 );
+   EXPECT_EQ( a[ 10 ], (ElementType) 3 );
+   EXPECT_EQ( a[ 11 ], (ElementType) 0 );
+   EXPECT_EQ( a[ 12 ], (ElementType) 1 );
+   EXPECT_EQ( a[ 13 ], (ElementType) 4 );
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+   throw GtestMissingError();
diff --git a/src/UnitTests/Containers/MultiArrayTest.h b/src/UnitTests/Containers/MultiArrayTest.h
index a14fb919e4cbb34b7aaec0f1cb219280f803b06b..c1a506c26a2d451de59819c70cc3faf7c09552ab 100644
--- a/src/UnitTests/Containers/MultiArrayTest.h
+++ b/src/UnitTests/Containers/MultiArrayTest.h
@@ -119,7 +119,7 @@ TEST( MultiArrayTest, testSetGetElement )
                Devices::Cuda::passToDevice( u );
       testSetGetElementKernel<<< 1, 16 >>>( kernel_u );
       Devices::Cuda::freeFromDevice( kernel_u );
-      ASSERT_TRUE( checkCudaDevice );
    for( int i = 0; i < 10; i ++ )
@@ -207,14 +207,16 @@ TEST( MultiArrayTest, testSaveAndLoad )
    for( int i = 0; i < size; i ++ )
       setDiagonalElement( v, i, 3.14147 );
    File file;
-   file. open( "test-file.tnl", tnlWriteMode );
+   file. open( "test-file.tnl", IOMode::write );
    ASSERT_TRUE( v. save( file ) );
    file. close();
    MultiArray< Dimension, ElementType, Device, IndexType > u;
-   file. open( "test-file.tnl", tnlReadMode );
+   file. open( "test-file.tnl", IOMode::read );
    ASSERT_TRUE( u. load( file ) );
    file. close();
    ASSERT_TRUE( u == v );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
 #endif /* HAVE_GTEST */
diff --git a/src/UnitTests/Containers/StaticArrayTest.cpp b/src/UnitTests/Containers/StaticArrayTest.cpp
index afb3017f0909920565046725a8b38a332117bc21..d6dedf6236caf347dba2c458e9ef992cfdec5565 100644
--- a/src/UnitTests/Containers/StaticArrayTest.cpp
+++ b/src/UnitTests/Containers/StaticArrayTest.cpp
@@ -8,4 +8,304 @@
 /* See Copyright Notice in tnl/Copyright */
-#include "StaticArrayTest.h"
+#ifdef HAVE_GTEST
+#include <TNL/Containers/StaticArray.h>
+#include <TNL/Containers/Array.h>
+#include "gtest/gtest.h"
+using namespace TNL;
+using namespace TNL::Containers;
+// test fixture for typed tests
+template< typename Array >
+class StaticArrayTest : public ::testing::Test
+   using ArrayType = Array;
+   using ElementType = typename Array::ElementType;
+// types for which ArrayTest is instantiated
+using StaticArrayTypes = ::testing::Types<
+   StaticArray< 1, short >,
+   StaticArray< 2, short >,
+   StaticArray< 3, short >,
+   StaticArray< 4, short >,
+   StaticArray< 5, short >,
+   StaticArray< 1, int >,
+   StaticArray< 2, int >,
+   StaticArray< 3, int >,
+   StaticArray< 4, int >,
+   StaticArray< 5, int >,
+   StaticArray< 1, long >,
+   StaticArray< 2, long >,
+   StaticArray< 3, long >,
+   StaticArray< 4, long >,
+   StaticArray< 5, long >,
+   StaticArray< 1, float >,
+   StaticArray< 2, float >,
+   StaticArray< 3, float >,
+   StaticArray< 4, float >,
+   StaticArray< 5, float >,
+   StaticArray< 1, double >,
+   StaticArray< 2, double >,
+   StaticArray< 3, double >,
+   StaticArray< 4, double >,
+   StaticArray< 5, double >
+TYPED_TEST_CASE( StaticArrayTest, StaticArrayTypes );
+TYPED_TEST( StaticArrayTest, constructors )
+   using ArrayType = typename TestFixture::ArrayType;
+   using ElementType = typename TestFixture::ElementType;
+   constexpr int Size = ArrayType::size;
+   ElementType data[ Size ];
+   for( int i = 0; i < Size; i++ )
+      data[ i ] = i;
+   ArrayType u0;
+   EXPECT_TRUE( u0.getData() );
+   ArrayType u1( data );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u1[ i ], data[ i ] );
+   ArrayType u2( 7 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u2[ i ], 7 );
+   ArrayType u3( u1 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u3[ i ], u1[ i ] );
+   // initialization with 0 requires special treatment to avoid ambiguity,
+   // see https://stackoverflow.com/q/4610503
+   ArrayType v( 0 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( v[ i ], 0 );
+TYPED_TEST( StaticArrayTest, getSize )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u;
+   EXPECT_EQ( u.getSize(), Size );
+TYPED_TEST( StaticArrayTest, getData )
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u1;
+   EXPECT_TRUE( u1.getData() );
+   const ArrayType u2;
+   EXPECT_TRUE( u2.getData() );
+template< typename Element >
+void checkCoordinates( StaticArray< 1, Element >& u )
+   EXPECT_EQ( u.x(), 0 );
+   u.x() += 1;
+   EXPECT_EQ( u.x(), 1 );
+template< typename Element >
+void checkCoordinates( StaticArray< 2, Element >& u )
+   EXPECT_EQ( u.x(), 0 );
+   EXPECT_EQ( u.y(), 1 );
+   u.x() += 1;
+   u.y() += 1;
+   EXPECT_EQ( u.x(), 1 );
+   EXPECT_EQ( u.y(), 2 );
+template< typename Element >
+void checkCoordinates( StaticArray< 3, Element >& u )
+   EXPECT_EQ( u.x(), 0 );
+   EXPECT_EQ( u.y(), 1 );
+   EXPECT_EQ( u.z(), 2 );
+   u.x() += 1;
+   u.y() += 1;
+   u.z() += 1;
+   EXPECT_EQ( u.x(), 1 );
+   EXPECT_EQ( u.y(), 2 );
+   EXPECT_EQ( u.z(), 3 );
+template< int _Size, typename Element >
+void checkCoordinates( StaticArray< _Size, Element >& u )
+TYPED_TEST( StaticArrayTest, CoordinatesGetter )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u;
+   for( int i = 0; i < Size; i++ )
+      u[ i ] = i;
+   checkCoordinates( u );
+TYPED_TEST( StaticArrayTest, ComparisonOperator )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u1, u2, u3;
+   for( int i = 0; i < Size; i++ ) {
+      u1[ i ] = 1;
+      u2[ i ] = i;
+      u3[ i ] = i;
+   }
+   EXPECT_TRUE( u1 == u1 );
+   EXPECT_TRUE( u1 != u2 );
+   EXPECT_TRUE( u2 == u3 );
+   // comparison with different type
+   StaticArray< Size, char > u4( 1 );
+   EXPECT_TRUE( u1 == u4 );
+   EXPECT_TRUE( u2 != u4 );
+   EXPECT_TRUE( u3 != u4 );
+   for( int i = 0; i < Size; i++ )
+      u4[ i ] = i;
+   EXPECT_TRUE( u1 != u4 );
+   EXPECT_TRUE( u2 == u4 );
+   EXPECT_TRUE( u3 == u4 );
+TYPED_TEST( StaticArrayTest, AssignmentOperator )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u1, u2, u3;
+   for( int i = 0; i < Size; i++ )
+   {
+      u1[ i ] = 1;
+      u2[ i ] = i;
+   }
+   u3 = u1;
+   EXPECT_TRUE( u3 == u1 );
+   EXPECT_TRUE( u3 != u2 );
+   u3 = u2;
+   EXPECT_TRUE( u3 == u2 );
+   EXPECT_TRUE( u3 != u1 );
+   // assignment from different type
+   StaticArray< Size, char > u4( 128 );
+   u3 = u4;
+   EXPECT_TRUE( u3 == u4 );
+TYPED_TEST( StaticArrayTest, setValue )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u;
+   u.setValue( 42 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u[ i ], 42 );
+TYPED_TEST( StaticArrayTest, CastToDifferentStaticArray )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   using OtherArray = StaticArray< Size, char >;
+   ArrayType u1( 1 );
+   OtherArray u2( 1 );
+   EXPECT_EQ( (OtherArray) u1, u2 );
+   EXPECT_EQ( u1, (ArrayType) u2 );
+TYPED_TEST( StaticArrayTest, SaveAndLoad )
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u1( 7 ), u2;
+   File file;
+   file.open( "tnl-static-array-test.tnl", IOMode::write );
+   u1.save( file );
+   file.close();
+   file.open( "tnl-static-array-test.tnl", IOMode::read );
+   u2.load( file );
+   file.close();
+   EXPECT_EQ( u1, u2 );
+   EXPECT_EQ( std::remove( "tnl-static-array-test.tnl" ), 0 );
+TYPED_TEST( StaticArrayTest, sort )
+   using ArrayType = typename TestFixture::ArrayType;
+   constexpr int Size = ArrayType::size;
+   ArrayType u;
+   for( int i = 0; i < Size; i++ )
+      u[ i ] = Size - i - 1;
+   u.sort();
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u[ i ], i );
+TYPED_TEST( StaticArrayTest, streamOperator )
+   using ArrayType = typename TestFixture::ArrayType;
+   ArrayType u;
+   std::stringstream testStream;
+   testStream << u;
+TYPED_TEST( StaticArrayTest, BindToArray )
+   using ArrayType = typename TestFixture::ArrayType;
+   using ElementType = typename TestFixture::ElementType;
+   constexpr int Size = ArrayType::size;
+   ArrayType a;
+   for( int i = 0; i < Size; i++ )
+      a[ i ] = i+1;
+   Array< ElementType, Devices::Host > sharedArray;
+   sharedArray.bind( a );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( a[ i ], sharedArray[ i ] );
+#endif // HAVE_GTEST
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+   throw GtestMissingError();
diff --git a/src/UnitTests/Containers/StaticArrayTest.h b/src/UnitTests/Containers/StaticArrayTest.h
deleted file mode 100644
index b26af6f60673f037c4cea63d3a6d1eddc9b06c42..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/StaticArrayTest.h
+++ /dev/null
@@ -1,188 +0,0 @@
-                          StaticArrayTester.h  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#include <TNL/Containers/StaticArray.h>
-#include <TNL/Containers/Array.h>
-#ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
-using namespace TNL;
-using namespace TNL::Containers;
-class testingClassForStaticArrayTester
-   public:
-      static String getType()
-      {
-         return String( "testingClassForStaticArrayTester" );
-      };
-String getType( const testingClassForStaticArrayTester& c )
-   return String( "testingClassForStaticArrayTester" );
-#ifdef HAVE_GTEST
-typedef int ElementType;
-const int Size( 16 );
-TEST( StaticArrayTest, testConstructors )
-   ElementType data[ Size ];
-   for( int i = 0; i < Size; i++ )
-      data[ i ] = i;
-   StaticArray< Size, ElementType > u1( data );
-   for( int i = 0; i < Size; i++ )
-      ASSERT_EQ( u1[ i ], data[ i ] );
-   StaticArray< Size, ElementType > u2( 7 );
-   for( int i = 0; i < Size; i++ )
-      ASSERT_EQ( u2[ i ], 7 );
-   StaticArray< Size, ElementType > u3( u1 );
-   for( int i = 0; i < Size; i++ )
-      ASSERT_EQ( u3[ i ], u1[ i ] );
-template< typename Element >
-void checkCoordinates( const StaticArray< 1, Element >& u )
-   ASSERT_EQ( u.x(), 0 );
-template< typename Element >
-void checkCoordinates( const StaticArray< 2, Element >& u )
-   ASSERT_EQ( u.x(), 0 );
-   ASSERT_EQ( u.y(), 1 );
-template< typename Element >
-void checkCoordinates( const StaticArray< 3, Element >& u )
-   ASSERT_EQ( u.x(), 0 );
-   ASSERT_EQ( u.y(), 1 );
-   ASSERT_EQ( u.z(), 2 );
-template< int _Size, typename Element >
-void checkCoordinates( const StaticArray< _Size, Element >& u )
-TEST( StaticArrayTest, testCoordinatesGetter )
-   StaticArray< Size, ElementType > u;
-   for( int i = 0; i < Size; i++ )
-      u[ i ] = i;
-   checkCoordinates( u );
-TEST( StaticArrayTest, testComparisonOperator )
-   StaticArray< Size, ElementType > u1, u2, u3;
-   for( int i = 0; i < Size; i++ )
-   {
-      u1[ i ] = 1;
-      u2[ i ] = i;
-      u3[ i ] = i;
-   }
-   ASSERT_TRUE( u1 == u1 );
-   ASSERT_TRUE( u1 != u2 );
-   ASSERT_TRUE( u2 == u3 );
-TEST( StaticArrayTest, testAssignmentOperator )
-   StaticArray< Size, ElementType > u1, u2, u3;
-   for( int i = 0; i < Size; i++ )
-   {
-      u1[ i ] = 1;
-      u2[ i ] = i;
-   }
-   u3 = u1;
-   ASSERT_TRUE( u3 == u1 );
-   ASSERT_TRUE( u3 != u2 );
-   u3 = u2;
-   ASSERT_TRUE( u3 == u2 );
-   ASSERT_TRUE( u3 != u1 );
-TEST( StaticArrayTest, testLoadAndSave )
-   StaticArray< Size, ElementType > u1( 7 ), u2( 0 );
-   File file;
-   file.open( "tnl-static-array-test.tnl", tnlWriteMode );
-   u1.save( file );
-   file.close();
-   file.open( "tnl-static-array-test.tnl", tnlReadMode );
-   u2.load( file );
-   file.close();
-   ASSERT_EQ( u1, u2 );
-TEST( StaticArrayTest, testSort )
-   StaticArray< Size, ElementType > u;
-   for( int i = 0; i < Size; i++ )
-      u[ i ] = Size - i - 1;
-   u.sort();
-   for( int i = 0; i < Size; i++ )
-      ASSERT_EQ( u[ i ], i );
-TEST( StaticArrayTest, testStreamOperator )
-   StaticArray< Size, ElementType > u;
-   std::stringstream testStream;
-   testStream << u;
-TEST( StaticArrayTest, testBindToArray )
-   StaticArray< Size, ElementType > a;
-   for( int i = 0; i < Size; i++ )
-      a[ i ] = i+1;
-   Array< ElementType, Devices::Host > sharedArray;
-   sharedArray.bind( a );
-   for( int i = 0; i < Size; i++ )
-      ASSERT_EQ( a[ i ], sharedArray[ i ] );
-#endif /* HAVE_GTEST */
-int main( int argc, char* argv[] )
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
diff --git a/src/UnitTests/Containers/StaticVectorTest.cpp b/src/UnitTests/Containers/StaticVectorTest.cpp
index 3505faf2c9ea1cc85566d7f1347f1fb3a8ec2ff5..e34db3f33833dc3254e13bc0b2c650a2613abd22 100644
--- a/src/UnitTests/Containers/StaticVectorTest.cpp
+++ b/src/UnitTests/Containers/StaticVectorTest.cpp
@@ -8,4 +8,196 @@
 /* See Copyright Notice in tnl/Copyright */
-#include "StaticVectorTest.h"
+#ifdef HAVE_GTEST
+#include <TNL/Containers/StaticVector.h>
+#include "gtest/gtest.h"
+using namespace TNL;
+using namespace TNL::Containers;
+// test fixture for typed tests
+template< typename Vector >
+class StaticVectorTest : public ::testing::Test
+   using VectorType = Vector;
+   using RealType = typename VectorType::RealType;
+// types for which VectorTest is instantiated
+using StaticVectorTypes = ::testing::Types<
+   StaticVector< 1, short >,
+   StaticVector< 1, int >,
+   StaticVector< 1, long >,
+   StaticVector< 1, float >,
+   StaticVector< 1, double >,
+   StaticVector< 2, short >,
+   StaticVector< 2, int >,
+   StaticVector< 2, long >,
+   StaticVector< 2, float >,
+   StaticVector< 2, double >,
+   StaticVector< 3, short >,
+   StaticVector< 3, int >,
+   StaticVector< 3, long >,
+   StaticVector< 3, float >,
+   StaticVector< 3, double >,
+   StaticVector< 4, short >,
+   StaticVector< 4, int >,
+   StaticVector< 4, long >,
+   StaticVector< 4, float >,
+   StaticVector< 4, double >,
+   StaticVector< 5, short >,
+   StaticVector< 5, int >,
+   StaticVector< 5, long >,
+   StaticVector< 5, float >,
+   StaticVector< 5, double >
+TYPED_TEST_CASE( StaticVectorTest, StaticVectorTypes );
+TYPED_TEST( StaticVectorTest, constructors )
+   using VectorType = typename TestFixture::VectorType;
+   using RealType = typename TestFixture::RealType;
+   constexpr int Size = VectorType::size;
+   RealType data[ Size ];
+   for( int i = 0; i < Size; i++ )
+      data[ i ] = i;
+   VectorType u0;
+   EXPECT_TRUE( u0.getData() );
+   VectorType u1( data );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u1[ i ], data[ i ] );
+   VectorType u2( 7 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u2[ i ], 7 );
+   VectorType u3( u1 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( u3[ i ], u1[ i ] );
+   // initialization with 0 requires special treatment to avoid ambiguity,
+   // see https://stackoverflow.com/q/4610503
+   VectorType v( 0 );
+   for( int i = 0; i < Size; i++ )
+      EXPECT_EQ( v[ i ], 0 );
+TYPED_TEST( StaticVectorTest, operators )
+   using VectorType = typename TestFixture::VectorType;
+   constexpr int size = VectorType::size;
+   VectorType u1( 1 ), u2( 2 ), u3( 3 );
+   u1 += u2;
+   EXPECT_EQ( u1[ 0 ], 3 );
+   EXPECT_EQ( u1[ size - 1 ], 3 );
+   u1 -= u2;
+   EXPECT_EQ( u1[ 0 ], 1 );
+   EXPECT_EQ( u1[ size - 1 ], 1 );
+   u1 *= 2;
+   EXPECT_EQ( u1[ 0 ], 2 );
+   EXPECT_EQ( u1[ size - 1 ], 2 );
+   u3 = u1 + u2;
+   EXPECT_EQ( u3[ 0 ], 4 );
+   EXPECT_EQ( u3[ size - 1 ], 4 );
+   u3 = u1 - u2;
+   EXPECT_EQ( u3[ 0 ], 0 );
+   EXPECT_EQ( u3[ size - 1 ], 0 );
+   u3 = 2 * u1;
+   EXPECT_EQ( u3[ 0 ], 4 );
+   EXPECT_EQ( u3[ size - 1 ], 4 );
+   EXPECT_EQ( u1 * u2, 4 * size );
+TYPED_TEST( StaticVectorTest, comparisons )
+   using VectorType = typename TestFixture::VectorType;
+   constexpr int size = VectorType::size;
+   VectorType u1( 1 ), u2( 2 ), u3( 3 ), u4;
+   for( int i = 0; i < size; i++ )
+      u4[ i ] = i;
+   EXPECT_TRUE( u1 < u3 );
+   EXPECT_TRUE( u1 <= u3 );
+   EXPECT_TRUE( u1 < u2 );
+   EXPECT_TRUE( u1 <= u2 );
+   EXPECT_TRUE( u3 > u1 );
+   EXPECT_TRUE( u3 >= u1 );
+   EXPECT_TRUE( u2 > u1 );
+   EXPECT_TRUE( u2 >= u1 );
+   if( size > 2 ) {
+      EXPECT_FALSE( u1 < u4 );
+      EXPECT_FALSE( u1 <= u4 );
+      EXPECT_FALSE( u1 > u4 );
+      EXPECT_FALSE( u1 >= u4 );
+   }
+TYPED_TEST( StaticVectorTest, cast )
+   using VectorType = typename TestFixture::VectorType;
+   constexpr int size = VectorType::size;
+   VectorType u( 1 );
+   EXPECT_EQ( (StaticVector< size, double >) u, u );
+TYPED_TEST( StaticVectorTest, abs )
+   using VectorType = typename TestFixture::VectorType;
+   constexpr int size = VectorType::size;
+   VectorType u;
+   for( int i = 0; i < size; i++ )
+      u[ i ] = i;
+   // TODO: implement unary minus operator
+   VectorType v = - 1 * u;
+   EXPECT_EQ( v.abs(), u );
+TYPED_TEST( StaticVectorTest, lpNorm )
+   using VectorType = typename TestFixture::VectorType;
+   using RealType = typename TestFixture::RealType;
+   constexpr int size = VectorType::size;
+   const RealType epsilon = std::numeric_limits< RealType >::epsilon();
+   VectorType v( 1 );
+   const RealType expectedL1norm = size;
+   const RealType expectedL2norm = std::sqrt( size );
+   const RealType expectedL3norm = std::cbrt( size );
+   EXPECT_EQ( v.lpNorm( 1.0 ), expectedL1norm );
+   EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm );
+   EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon );
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+   throw GtestMissingError();
diff --git a/src/UnitTests/Containers/StaticVectorTest.cu b/src/UnitTests/Containers/StaticVectorTest.cu
deleted file mode 100644
index 693ace5c43590a06a2f8c7c087a0d1480a35d02a..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/StaticVectorTest.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-                          StaticVectorTest.cu  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#include "StaticVectorTest.h"
diff --git a/src/UnitTests/Containers/StaticVectorTest.h b/src/UnitTests/Containers/StaticVectorTest.h
deleted file mode 100644
index 038ea0f4c7959a36e6cb3bcb8852267ca321e013..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/StaticVectorTest.h
+++ /dev/null
@@ -1,74 +0,0 @@
-                          StaticVectorTester.h  -  description
-                             -------------------
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#include <TNL/Containers/StaticVector.h>
-#ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
-using namespace TNL;
-#ifdef HAVE_GTEST
-const int Size( 16 );
-typedef double RealType;
-TEST( StaticVectorTest, testOperators )
-   Containers::StaticVector< Size, RealType > u1( 1.0 ), u2( 2.0 ), u3( 3.0 );
-   u1 += u2;
-   ASSERT_TRUE( u1[ 0 ] == 3.0 );
-   ASSERT_TRUE( u1[ Size - 1 ] == 3.0 );
-   u1 -= u2;
-   ASSERT_TRUE( u1[ 0 ] == 1.0 );
-   ASSERT_TRUE( u1[ Size - 1 ] == 1.0 );
-   u1 *= 2.0;
-   ASSERT_TRUE( u1[ 0 ] == 2.0 );
-   ASSERT_TRUE( u1[ Size - 1 ] == 2.0 );
-   u3 = u1 + u2;
-   ASSERT_TRUE( u3[ 0 ] == 4.0 );
-   ASSERT_TRUE( u3[ Size - 1 ] == 4.0 );
-   u3 = u1 - u2;
-   ASSERT_TRUE( u3[ 0 ] == 0.0 );
-   ASSERT_TRUE( u3[ Size - 1 ] == 0.0 );
-   u3 = u1 * 2.0;
-   ASSERT_TRUE( u3[ 0 ] == 4.0 );
-   ASSERT_TRUE( u3[ Size - 1 ] == 4.0 );
-   ASSERT_TRUE( u1 * u2 == 4.0 * Size );
-   ASSERT_TRUE( u1 < u3 );
-   ASSERT_TRUE( u1 <= u3 );
-   ASSERT_TRUE( u1 <= u2 );
-   ASSERT_TRUE( u3 > u1 );
-   ASSERT_TRUE( u3 >= u1 );
-   ASSERT_TRUE( u2 >= u1 );
-int main( int argc, char* argv[] )
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
diff --git a/src/UnitTests/Containers/VectorOperationsTest.cu b/src/UnitTests/Containers/VectorOperationsTest.cu
deleted file mode 100644
index 61d3b0357b9fb1c3e5b8decfe74ed6cae9446c25..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/VectorOperationsTest.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-                          Devices::CudaVectorOperationsTest.cu  -  description
-                             -------------------
-    begin                : Mar 31, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#include "VectorOperationsTest.h"
diff --git a/src/UnitTests/Containers/VectorOperationsTest.h b/src/UnitTests/Containers/VectorOperationsTest.h
deleted file mode 100644
index 08d18b1704ca0c23ee32928551b55a5feb96c772..0000000000000000000000000000000000000000
--- a/src/UnitTests/Containers/VectorOperationsTest.h
+++ /dev/null
@@ -1,342 +0,0 @@
-                          VectorOperationsTester.h  -  description
-                             -------------------
-    begin                : Mar 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#pragma once
-#ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/VectorOperations.h>
-using namespace TNL;
-using namespace TNL::Containers;
-using namespace TNL::Containers::Algorithms;
-#ifdef HAVE_GTEST
-typedef double Real;
-typedef Devices::Host Device;
-typedef int Index;
-template< typename Vector >
-void setLinearSequence( Vector& deviceVector )
-   Containers::Vector< typename Vector :: RealType, Devices::Host > a;
-   a. setSize( deviceVector. getSize() );
-   for( int i = 0; i < a. getSize(); i ++ )
-      a. getData()[ i ] = i;
-   ArrayOperations< typename Vector::DeviceType,
-                       Devices::Host >::
-   template copyMemory< typename Vector::RealType,
-                        typename Vector::RealType,
-                        typename Vector::IndexType >
-                      ( deviceVector.getData(),
-                        a.getData(),
-                        a.getSize() );
-template< typename Vector >
-void setOnesSequence( Vector& deviceVector )
-   Containers::Vector< typename Vector :: RealType, Devices::Host > a;
-   a. setSize( deviceVector. getSize() );
-   for( int i = 0; i < a. getSize(); i ++ )
-      a. getData()[ i ] = 1;
-   ArrayOperations< typename Vector::DeviceType,
-                       Devices::Host >::
-   template copyMemory< typename Vector::RealType,
-                        typename Vector::RealType,
-                        typename Vector::IndexType >
-                      ( deviceVector.getData(),
-                        a.getData(),
-                        a.getSize() );
-template< typename Vector >
-void setNegativeLinearSequence( Vector& deviceVector )
-   Containers::Vector< typename Vector :: RealType, Devices::Host > a;
-   a. setSize( deviceVector. getSize() );
-   for( int i = 0; i < a. getSize(); i ++ )
-      a. getData()[ i ] = -i;
-   ArrayOperations< typename Vector::DeviceType,
-                       Devices::Host >::
-   template copyMemory< typename Vector::RealType,
-                        typename Vector::RealType,
-                        typename Vector::IndexType >
-                      ( deviceVector.getData(),
-                        a.getData(),
-                        a.getSize() );
-template< typename Vector >
-void setOscilatingSequence( Vector& deviceVector,
-                            typename Vector::RealType v )
-   Containers::Vector< typename Vector::RealType, Devices::Host > a;
-   a.setSize( deviceVector. getSize() );
-   a[ 0 ] = v;
-   for( int i = 1; i < a. getSize(); i ++ )
-      a.getData()[ i ] = a.getData()[ i-1 ] * -1;
-   ArrayOperations< typename Vector::DeviceType,
-                       Devices::Host >::
-   template copyMemory< typename Vector::RealType,
-                        typename Vector::RealType,
-                        typename Vector::IndexType >
-                      ( deviceVector.getData(),
-                        a.getData(),
-                        a.getSize() );
-TEST( VectorOperationsTest, getVectorMaxTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setLinearSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorMax( v ) == size - 1 );
-TEST( VectorOperationsTest, getVectorMinTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setLinearSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorMin( v ) == 0 );
-TEST( VectorOperationsTest, getVectorAbsMaxTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setNegativeLinearSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorAbsMax( v ) == size - 1 );
-TEST( VectorOperationsTest, getVectorAbsMinTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setNegativeLinearSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorAbsMin( v ) == 0 );
-TEST( VectorOperationsTest, getVectorLpNormTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorLpNorm( v, 2.0 ) == ::sqrt( size ) );
-TEST( VectorOperationsTest, getVectorSumTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > v;
-   v. setSize( size );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorSum( v ) == size );
-   setLinearSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorSum( v ) == ( ( Real ) size ) * ( ( Real ) size - 1 ) / 2 );
-TEST( VectorOperationsTest, getVectorDifferenceMaxTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   setLinearSequence( u );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceMax( u, v ) == size - 2 );
-TEST( VectorOperationsTest, getVectorDifferenceMinTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   setLinearSequence( u );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceMin( u, v ) == -1 );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceMin( v, u ) == -123454 );
-TEST( VectorOperationsTest, getVectorDifferenceAbsMaxTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   setNegativeLinearSequence( u );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceAbsMax( u, v ) == size );
-TEST( VectorOperationsTest, getVectorDifferenceAbsMinTest )
-   const int size( 123456 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   setLinearSequence( u );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceAbsMin( u, v ) == 0 );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceAbsMin( v, u ) == 0 );
-TEST( VectorOperationsTest, getVectorDifferenceLpNormTest )
-   const int size( 1024 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   u. setValue( 3.0 );
-   v. setValue( 1.0 );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceLpNorm( u, v, 1.0 ) == 2.0 * size );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceLpNorm( u, v, 2.0 ) == ::sqrt( 4.0 * size ) );
-TEST( VectorOperationsTest, getVectorDifferenceSumTest )
-   const int size( 1024 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   u. setValue( 3.0 );
-   v. setValue( 1.0 );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getVectorDifferenceSum( u, v ) == 2.0 * size );
-TEST( VectorOperationsTest, vectorScalarMultiplicationTest )
-   const int size( 1025 );
-   Containers::Vector< Real, Device > u;
-   u. setSize( size );
-   setLinearSequence( u );
-   Containers::VectorOperations< Device >::vectorScalarMultiplication( u, 3.0 );
-   for( int i = 0; i < size; i++ )
-      ASSERT_TRUE( u.getElement( i ) == 3.0 * i );
-TEST( VectorOperationsTest, getVectorScalarProductTest )
-   const int size( 1025 );
-   Containers::Vector< Real, Device > u, v;
-   u. setSize( size );
-   v. setSize( size );
-   setOscilatingSequence( u, 1.0 );
-   setOnesSequence( v );
-   ASSERT_TRUE( Containers::VectorOperations< Device > :: getScalarProduct( u, v ) == 1.0 );
-TEST( VectorOperationsTest, addVectorTest )
-   const int size( 10000 );
-   Containers::Vector< Real, Device > x, y;
-   x.setSize( size );
-   y.setSize( size );
-   setLinearSequence( x );
-   setOnesSequence( y );
-   Containers::VectorOperations< Device >::addVector( y, x, 3.0 );
-   for( int i = 0; i < size; i ++ )
-      ASSERT_TRUE( y.getElement( i ) == 1.0 + 3.0 * i );
-TEST( VectorOperationsTest, prefixSumTest )
-   const int size( 10000 );
-   Containers::Vector< Real, Device > v;
-   v.setSize( size );
-   setOnesSequence( v );
-   v.computePrefixSum();
-   for( int i = 0; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) == i + 1 );
-   v.setValue( 0 );
-   v.computePrefixSum();
-   for( int i = 0; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) == 0 );
-   setLinearSequence( v );
-   v.computePrefixSum();
-   for( int i = 1; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) - v.getElement( i - 1 ) == i );
-TEST( VectorOperationsTest, exclusivePrefixSumTest )
-   const int size( 10000 );
-   Containers::Vector< Real, Device > v;
-   v.setSize( size );
-   setOnesSequence( v );
-   v.computeExclusivePrefixSum();
-   for( int i = 0; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) == i );
-   v.setValue( 0 );
-   v.computeExclusivePrefixSum();
-   for( int i = 0; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) == 0 );
-   setLinearSequence( v );
-   v.computeExclusivePrefixSum();
-   for( int i = 1; i < size; i++ )
-      ASSERT_TRUE( v.getElement( i ) - v.getElement( i - 1 ) == i - 1 );
-#endif /* HAVE_GTEST */
-int main( int argc, char* argv[] )
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
diff --git a/src/UnitTests/Containers/VectorTest.cu b/src/UnitTests/Containers/VectorTest.cu
index e36b670972b5cf89558a9a2e13a3db817631ea31..f173d4a5ee71749ebf318fba56430101125c0bd4 100644
--- a/src/UnitTests/Containers/VectorTest.cu
+++ b/src/UnitTests/Containers/VectorTest.cu
@@ -8,4 +8,4 @@
 /* See Copyright Notice in tnl/Copyright */
-#include "tnlVectorTest.h"
+#include "VectorTest.h"
diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h
index 56966b64a2620c3fa4c7d6aef21a805aa7b0b857..2f3832fcda5439e617dfe9cea27bd8a11dadd6cb 100644
--- a/src/UnitTests/Containers/VectorTest.h
+++ b/src/UnitTests/Containers/VectorTest.h
@@ -1,5 +1,5 @@
-                          VectorTester.h  -  description
+                          VectorTest.h  -  description
     begin                : Oct 25, 2010
     copyright            : (C) 2010 by Tomas Oberhuber
@@ -8,206 +8,526 @@
 /* See Copyright Notice in tnl/Copyright */
+// NOTE: Vector = Array + VectorOperations, so we test Vector and VectorOperations at the same time
 #pragma once
+#ifdef HAVE_GTEST
+#include <limits>
 #include <TNL/Containers/Vector.h>
 #include <TNL/File.h>
 #include <TNL/Math.h>
+#include "gtest/gtest.h"
 using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Containers::Algorithms;
+// should be small enough to have fast tests, but larger than minGPUReductionDataSize
+// and large enough to require multiple CUDA blocks for reduction
+constexpr int VECTOR_TEST_SIZE = 5000;
-#ifdef HAVE_GTEST
-TEST( VectorTest, testMax )
+template< typename Vector >
+void setLinearSequence( Vector& deviceVector )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v. setElement( i, i );
-   ASSERT_TRUE( v. max() == 9 );
+   typename Vector::HostType a;
+   a.setLike( deviceVector );
+   for( int i = 0; i < a.getSize(); i++ )
+      a[ i ] = i;
+   deviceVector = a;
-TEST( VectorTest, testMin )
+template< typename Vector >
+void setConstantSequence( Vector& deviceVector,
+                          typename Vector::RealType v )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v. setElement( i, i );
-   ASSERT_TRUE( v. min() == 0 );
+   deviceVector.setValue( v );
-TEST( VectorTest, testAbsMax )
+template< typename Vector >
+void setNegativeLinearSequence( Vector& deviceVector )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement( i, -i );
-   ASSERT_TRUE( v. absMax() == 9 );
+   typename Vector::HostType a;
+   a.setLike( deviceVector );
+   for( int i = 0; i < a.getSize(); i++ )
+      a[ i ] = -i;
+   deviceVector = a;
-TEST( VectorTest, testAbsMin )
+template< typename Vector >
+void setOscilatingSequence( Vector& deviceVector,
+                            typename Vector::RealType v )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement( i,  -i );
-   ASSERT_TRUE( v. absMin() == 0 );
+   typename Vector::HostType a;
+   a.setLike( deviceVector );
+   a[ 0 ] = v;
+   for( int i = 1; i < a.getSize(); i++ )
+      a[ i ] = a[ i-1 ] * -1;
+   deviceVector = a;
+// TODO: test everything with OpenMP with different number of threads
-TEST( VectorTest, testLpNorm )
+// test fixture for typed tests
+template< typename Vector >
+class VectorTest : public ::testing::Test
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement(  i, -2 );
-   ASSERT_TRUE( isSmall( v.lpNorm( 1 ) - 20.0 ) );
-   ASSERT_TRUE( isSmall( v.lpNorm( 2 ) - ::sqrt( 40.0 ) ) );
-   ASSERT_TRUE( isSmall( v.lpNorm( 3 ) - ::pow( 80.0, 1.0/3.0 ) ) );
+   using VectorType = Vector;
+   using VectorOperations = Algorithms::VectorOperations< typename VectorType::DeviceType >;
-TEST( VectorTest, testSum )
+// types for which VectorTest is instantiated
+using VectorTypes = ::testing::Types<
+   Vector< int,    Devices::Host, short >,
+   Vector< long,   Devices::Host, short >,
+   Vector< float,  Devices::Host, short >,
+   Vector< double, Devices::Host, short >,
+   Vector< int,    Devices::Host, int >,
+   Vector< long,   Devices::Host, int >,
+   Vector< float,  Devices::Host, int >,
+   Vector< double, Devices::Host, int >,
+   Vector< int,    Devices::Host, long >,
+   Vector< long,   Devices::Host, long >,
+   Vector< float,  Devices::Host, long >,
+   Vector< double, Devices::Host, long >
+#ifdef HAVE_CUDA
+   ,
+   Vector< int,    Devices::Cuda, short >,
+   Vector< long,   Devices::Cuda, short >,
+   Vector< float,  Devices::Cuda, short >,
+   Vector< double, Devices::Cuda, short >,
+   Vector< int,    Devices::Cuda, int >,
+   Vector< long,   Devices::Cuda, int >,
+   Vector< float,  Devices::Cuda, int >,
+   Vector< double, Devices::Cuda, int >,
+   Vector< int,    Devices::Cuda, long >,
+   Vector< long,   Devices::Cuda, long >,
+   Vector< float,  Devices::Cuda, long >,
+   Vector< double, Devices::Cuda, long >
+#ifdef HAVE_MIC
+   ,
+   Vector< int,    Devices::MIC, short >,
+   Vector< long,   Devices::MIC, short >,
+   Vector< float,  Devices::MIC, short >,
+   Vector< double, Devices::MIC, short >,
+   Vector< int,    Devices::MIC, int >,
+   Vector< long,   Devices::MIC, int >,
+   Vector< float,  Devices::MIC, int >,
+   Vector< double, Devices::MIC, int >,
+   Vector< int,    Devices::MIC, long >,
+   Vector< long,   Devices::MIC, long >,
+   Vector< float,  Devices::MIC, long >,
+   Vector< double, Devices::MIC, long >
+TYPED_TEST_CASE( VectorTest, VectorTypes );
+TYPED_TEST( VectorTest, max )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement( i, -2 );
-   ASSERT_TRUE( v. sum() == -20.0 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement( i,  2 );
-   ASSERT_TRUE( v. sum() == 20.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setLinearSequence( v );
+   EXPECT_EQ( v.max(), size - 1 );
+   EXPECT_EQ( VectorOperations::getVectorMax( v ), size - 1 );
-TEST( VectorTest, testDifferenceMax )
+TYPED_TEST( VectorTest, min )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i,  i );
-      v2.setElement( i, -i );
-   }
-   ASSERT_TRUE( v1. differenceMax( v2 ) == 18.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setLinearSequence( v );
+   EXPECT_EQ( v.min(), 0 );
+   EXPECT_EQ( VectorOperations::getVectorMin( v ), 0 );
-TEST( VectorTest, testDifferenceMin )
+TYPED_TEST( VectorTest, absMax )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, i );
-      v2.setElement( i, -i );
-   }
-   ASSERT_TRUE( v1. differenceMin( v2 ) == 0.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setNegativeLinearSequence( v );
-TEST( VectorTest, testDifferenceAbsMax )
+   EXPECT_EQ( v.absMax(), size - 1 );
+   EXPECT_EQ( VectorOperations::getVectorAbsMax( v ), size - 1 );
+TYPED_TEST( VectorTest, absMin )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, -i );
-      v2.setElement( i, i );
-   }
-   ASSERT_TRUE( v1. differenceAbsMax( v2 ) == 18.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
-TEST( VectorTest, testDifferenceAbsMin )
+   VectorType v;
+   v.setSize( size );
+   setNegativeLinearSequence( v );
+   EXPECT_EQ( v.absMin(), 0 );
+   EXPECT_EQ( VectorOperations::getVectorAbsMin( v ), 0 );
+TYPED_TEST( VectorTest, lpNorm )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, -i );
-      v2.setElement( i, i );
-   }
-   ASSERT_TRUE( v1. differenceAbsMin( v2 ) == 0.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using RealType = typename VectorType::RealType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon();
+   VectorType v;
+   v.setSize( size );
+   setConstantSequence( v, 1 );
+   const RealType expectedL1norm = size;
+   const RealType expectedL2norm = std::sqrt( size );
+   const RealType expectedL3norm = std::cbrt( size );
+   EXPECT_EQ( v.lpNorm( 1.0 ), expectedL1norm );
+   EXPECT_EQ( v.lpNorm( 2.0 ), expectedL2norm );
+   EXPECT_NEAR( v.lpNorm( 3.0 ), expectedL3norm, epsilon );
+   EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 1.0 ), expectedL1norm );
+   EXPECT_EQ( VectorOperations::getVectorLpNorm( v, 2.0 ), expectedL2norm );
+   EXPECT_NEAR( VectorOperations::getVectorLpNorm( v, 3.0 ), expectedL3norm, epsilon );
-TEST( VectorTest, testDifferenceLpNorm )
+TYPED_TEST( VectorTest, sum )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, -1 );
-      v2.setElement( i, 1 );
-   }
-   ASSERT_TRUE( isSmall( v1.differenceLpNorm( v2, 1.0 ) - 20.0 ) );
-   ASSERT_TRUE( isSmall( v1.differenceLpNorm( v2, 2.0 ) - ::sqrt( 40.0 ) ) );
-   ASSERT_TRUE( isSmall( v1.differenceLpNorm( v2, 3.0 ) - ::pow( 80.0, 1.0/3.0 ) ) );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   // this test expect an even size
+   const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setConstantSequence( v, 1 );
+   EXPECT_EQ( v.sum(), size );
+   EXPECT_EQ( VectorOperations::getVectorSum( v ), size );
-TEST( VectorTest, testDifferenceSum )
+   setLinearSequence( v );
+   EXPECT_EQ( v.sum(), 0.5 * size * ( size - 1 ) );
+   EXPECT_EQ( VectorOperations::getVectorSum( v ), 0.5 * size * ( size - 1 ) );
+   setNegativeLinearSequence( v );
+   EXPECT_EQ( v.sum(), - 0.5 * size * ( size - 1 ) );
+   EXPECT_EQ( VectorOperations::getVectorSum( v ), - 0.5 * size * ( size - 1 ) );
+   setOscilatingSequence( v, 1.0 );
+   EXPECT_EQ( v.sum(), 0 );
+   EXPECT_EQ( VectorOperations::getVectorSum( v ), 0 );
+TYPED_TEST( VectorTest, differenceMax )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, -1 );
-      v2.setElement( i, 1 );
-   }
-   ASSERT_TRUE( v1. differenceSum( v2 ) == -20.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
-TEST( VectorTest, testScalarMultiplication )
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   setLinearSequence( u );
+   setConstantSequence( v, size / 2 );
+   EXPECT_EQ( u.differenceMax( v ), size - 1 - size / 2 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceMax( u, v ), size - 1 - size / 2 );
+TYPED_TEST( VectorTest, differenceMin )
-   Containers::Vector< RealType, Device, IndexType > v;
-   v. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-      v.setElement( i, i );
-   v. scalarMultiplication( 5.0 );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
-   for( int i = 0; i < 10; i ++ )
-      ASSERT_TRUE( v. getElement( i ) == 5 * i );
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   setLinearSequence( u );
+   setConstantSequence( v, size / 2 );
+   EXPECT_EQ( u.differenceMin( v ), - size / 2 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceMin( u, v ), - size / 2 );
+   EXPECT_EQ( v.differenceMin( u ), size / 2 - size + 1 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceMin( v, u ), size / 2 - size + 1 );
+TYPED_TEST( VectorTest, differenceAbsMax )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   // this test expects an odd size
+   const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1;
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   setNegativeLinearSequence( u );
+   setConstantSequence( v, - size / 2 );
-TEST( VectorTest, testScalarProduct )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   v1.setElement( 0, -1 );
-   v2.setElement( 0, 1 );
-   for( int i = 1; i < 10; i ++ )
-   {
-      v1.setElement( i, v1.getElement( i - 1 ) * -1 );
-      v2.setElement( i, v2.getElement( i - 1 ) );
+   EXPECT_EQ( u.differenceAbsMax( v ), size - 1 - size / 2 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMax( u, v ), size - 1 - size / 2 );
+TYPED_TEST( VectorTest, differenceAbsMin )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   setNegativeLinearSequence( u );
+   setConstantSequence( v, - size / 2 );
+   EXPECT_EQ( u.differenceAbsMin( v ), 0 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( u, v ), 0 );
+   EXPECT_EQ( v.differenceAbsMin( u ), 0 );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceAbsMin( v, u ), 0 );
+TYPED_TEST( VectorTest, differenceLpNorm )
+   using VectorType = typename TestFixture::VectorType;
+   using RealType = typename VectorType::RealType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   const RealType epsilon = 64 * std::numeric_limits< RealType >::epsilon();
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   u.setValue( 3.0 );
+   v.setValue( 1.0 );
+   const RealType expectedL1norm = 2.0 * size;
+   const RealType expectedL2norm = std::sqrt( 4.0 * size );
+   const RealType expectedL3norm = std::cbrt( 8.0 * size );
+   EXPECT_EQ( u.differenceLpNorm( v, 1.0 ), expectedL1norm );
+   EXPECT_EQ( u.differenceLpNorm( v, 2.0 ), expectedL2norm );
+   EXPECT_NEAR( u.differenceLpNorm( v, 3.0 ), expectedL3norm, epsilon );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 1.0 ), expectedL1norm );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceLpNorm( u, v, 2.0 ), expectedL2norm );
+   EXPECT_NEAR( VectorOperations::getVectorDifferenceLpNorm( u, v, 3.0 ), expectedL3norm, epsilon );
+TYPED_TEST( VectorTest, differenceSum )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   // this test expect an even size
+   const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE - 1 : VECTOR_TEST_SIZE;
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   v.setValue( 1.0 );
+   setConstantSequence( u, 2 );
+   EXPECT_EQ( u.differenceSum( v ), size );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), size );
+   setLinearSequence( u );
+   EXPECT_EQ( u.differenceSum( v ), 0.5 * size * ( size - 1 ) - size );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), 0.5 * size * ( size - 1 ) - size );
+   setNegativeLinearSequence( u );
+   EXPECT_EQ( u.differenceSum( v ), - 0.5 * size * ( size - 1 ) - size );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - 0.5 * size * ( size - 1 ) - size );
+   setOscilatingSequence( u, 1.0 );
+   EXPECT_EQ( u.differenceSum( v ), - size );
+   EXPECT_EQ( VectorOperations::getVectorDifferenceSum( u, v ), - size );
+TYPED_TEST( VectorTest, scalarMultiplication )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType u;
+   u.setSize( size );
+   typename VectorType::HostType expected;
+   expected.setSize( size );
+   for( int i = 0; i < size; i++ )
+      expected[ i ] = 2.0 * i;
+   setLinearSequence( u );
+   VectorOperations::vectorScalarMultiplication( u, 2.0 );
+   EXPECT_EQ( u, expected );
+   setLinearSequence( u );
+   u.scalarMultiplication( 2.0 );
+   EXPECT_EQ( u, expected );
+   setLinearSequence( u );
+   u *= 2.0;
+   EXPECT_EQ( u, expected );
+TYPED_TEST( VectorTest, scalarProduct )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   // this test expects an odd size
+   const int size = VECTOR_TEST_SIZE % 2 ? VECTOR_TEST_SIZE : VECTOR_TEST_SIZE - 1;
+   VectorType u, v;
+   u.setSize( size );
+   v.setSize( size );
+   setOscilatingSequence( u, 1.0 );
+   setConstantSequence( v, 1 );
+   EXPECT_EQ( u.scalarProduct( v ), 1.0 );
+   EXPECT_EQ( VectorOperations::getScalarProduct( u, v ), 1.0 );
+TYPED_TEST( VectorTest, addVector )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType x, y;
+   x.setSize( size );
+   y.setSize( size );
+   typename VectorType::HostType expected1, expected2;
+   expected1.setSize( size );
+   expected2.setSize( size );
+   for( int i = 0; i < size; i++ ) {
+      expected1[ i ] = 2.0 + 3.0 * i;
+      expected2[ i ] = 1.0 + 3.0 * i;
-   ASSERT_TRUE( v1. scalarProduct( v2 ) == 0.0 );
-TEST( VectorTest, addVectorTest )
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   VectorOperations::addVector( x, y, 3.0, 2.0 );
+   EXPECT_EQ( x, expected1 );
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   x.addVector( y, 3.0, 1.0 );
+   EXPECT_EQ( x, expected2 );
+TYPED_TEST( VectorTest, addVectors )
-   Containers::Vector< RealType, Device, IndexType > v1, v2;
-   v1. setSize( 10 );
-   v2. setSize( 10 );
-   for( int i = 0; i < 10; i ++ )
-   {
-      v1.setElement( i, i );
-      v2.setElement( i, 2.0 * i );
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType x, y, z;
+   x.setSize( size );
+   y.setSize( size );
+   z.setSize( size );
+   typename VectorType::HostType expected1, expected2;
+   expected1.setSize( size );
+   expected2.setSize( size );
+   for( int i = 0; i < size; i++ ) {
+      expected1[ i ] = 1.0 + 3.0 * i + 2.0;
+      expected2[ i ] = 2.0 + 3.0 * i + 2.0;
-   v1. addVector( v2, 2.0 );
-   for( int i = 0; i < 10; i ++ )
-      ASSERT_TRUE( v1. getElement( i ) == 5.0 * i );
-#endif /* HAVE_GTEST */
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   setConstantSequence( z, 2 );
+   VectorOperations::addVectors( x, y, 3.0, z, 1.0, 1.0 );
+   EXPECT_EQ( x, expected1 );
+   setConstantSequence( x, 1 );
+   setLinearSequence( y );
+   setConstantSequence( z, 2 );
+   x.addVectors( y, 3.0, z, 1.0, 2.0 );
+   EXPECT_EQ( x, expected2 );
+// TODO: fix the CUDA implementations
+TYPED_TEST( VectorTest, prefixSum )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setConstantSequence( v, 1 );
+   v.computePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), i + 1 );
+   v.setValue( 0 );
+   v.computePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
+   setLinearSequence( v );
+   v.computePrefixSum();
+   for( int i = 1; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i );
+// TODO: fix the CUDA implementations
+TYPED_TEST( VectorTest, exclusivePrefixSum )
+   using VectorType = typename TestFixture::VectorType;
+   using VectorOperations = typename TestFixture::VectorOperations;
+   const int size = VECTOR_TEST_SIZE;
+   VectorType v;
+   v.setSize( size );
+   setConstantSequence( v, 1 );
+   v.computeExclusivePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), i );
+   v.setValue( 0 );
+   v.computeExclusivePrefixSum();
+   for( int i = 0; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ), 0 );
+   setLinearSequence( v );
+   v.computeExclusivePrefixSum();
+   for( int i = 1; i < size; i++ )
+      EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i - 1 );
+// TODO: test prefix sum with custom begin and end parameters
+#endif // HAVE_GTEST
+#include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h
index 82b8b286c6ba576b95569b0ab7fc5d2ec4e1cda8..b1385f4122924185cfa898b569238e0118aa84af 100644
--- a/src/UnitTests/FileTest.h
+++ b/src/UnitTests/FileTest.h
@@ -1,8 +1,8 @@
-                          tnlFileTester.h  -  description
+                          FileTest.h  -  description
     begin                : Oct 24, 2010
-    copyright            : (C) 2010 by Tomas Oberhuber
+    copyright            : (C) 2010 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
@@ -11,60 +11,43 @@
 #include <TNL/File.h>
 #ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
-#ifdef HAVE_CUDA
-#include <cuda.h>
+#include <gtest/gtest.h>
 using namespace TNL;
-#ifdef HAVE_GTEST
+TEST( FileTest, CloseEmpty )
+   File file;
+   ASSERT_TRUE( file.close() );
 TEST( FileTest, WriteAndRead )
    File file;
-   if( ! file. open( String( "test-file.tnl" ), tnlWriteMode ) )
-   {
-      std::cerr << "Unable to create file test-file.tnl for the testing." << std::endl;
-      return;
-   }
+   ASSERT_TRUE( file.open( String( "test-file.tnl" ), IOMode::write ) );
    int intData( 5 );
-#ifdef HAVE_NOT_CXX11
-   file. write< int, Devices::Host >( &intData );
-   file. write( &intData );
    double doubleData[ 3 ] = { 1.0, 2.0, 3.0 };
-#ifdef HAVE_NOT_CXX11
-   file. write< double, Devices::Host >( doubleData, 3 );
-   file. write( doubleData, 3 );
-   if( ! file. close() )
-   {
-      std::cerr << "Unable to close the file test-file.tnl" << std::endl;
-      return;
-   }
-   if( ! file. open( String( "test-file.tnl" ), tnlReadMode ) )
-   {
-      std::cerr << "Unable to open the file test-file.tnl for the testing." << std::endl;
-      return;
-   }
+   const double constDoubleData = 3.14;
+   ASSERT_TRUE( file.write( &intData ) );
+   ASSERT_TRUE( file.write( doubleData, 3 ) );
+   ASSERT_TRUE( file.write( &constDoubleData ) );
+   ASSERT_TRUE( file.close() );
+   ASSERT_TRUE( file.open( String( "test-file.tnl" ), IOMode::read ) );
    int newIntData;
    double newDoubleData[ 3 ];
-#ifdef HAVE_NOT_CXX11
-   file. read< int, Devices::Host >( &newIntData );
-   file. read< double, Devices::Host >( newDoubleData, 3 );
-   file. read( &newIntData, 1 );
-   file. read( newDoubleData, 3 );
+   double newConstDoubleData;
+   ASSERT_TRUE( file.read( &newIntData, 1 ) );
+   ASSERT_TRUE( file.read( newDoubleData, 3 ) );
+   ASSERT_TRUE( file.read( &newConstDoubleData, 1 ) );
-   ASSERT_EQ( newIntData, intData );
+   EXPECT_EQ( newIntData, intData );
    for( int i = 0; i < 3; i ++ )
-      ASSERT_EQ( newDoubleData[ i ], doubleData[ i ] );
+      EXPECT_EQ( newDoubleData[ i ], doubleData[ i ] );
+   EXPECT_EQ( newConstDoubleData, constDoubleData );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
 #ifdef HAVE_CUDA
@@ -72,11 +55,14 @@ TEST( FileTest, WriteAndReadCUDA )
    int intData( 5 );
    float floatData[ 3 ] = { 1.0, 2.0, 3.0 };
+   const double constDoubleData = 3.14;
    int* cudaIntData;
    float* cudaFloatData;
+   const double* cudaConstDoubleData;
    cudaMalloc( ( void** ) &cudaIntData, sizeof( int ) );
    cudaMalloc( ( void** ) &cudaFloatData, 3 * sizeof( float ) );
+   cudaMalloc( ( void** ) &cudaConstDoubleData, sizeof( double ) );
    cudaMemcpy( cudaIntData,
                sizeof( int ),
@@ -85,34 +71,38 @@ TEST( FileTest, WriteAndReadCUDA )
                3 * sizeof( float ),
                cudaMemcpyHostToDevice );
+   cudaMemcpy( (void*) cudaConstDoubleData,
+               &constDoubleData,
+               sizeof( double ),
+               cudaMemcpyHostToDevice );
    File file;
-   if( ! file. open( String( "test-file.tnl" ), tnlWriteMode ) )
-   {
-      std::cerr << "Unable to create file test-file.tnl for the testing." << std::endl;
-      return;
-   }
-   file. write< int, Devices::Cuda >( cudaIntData );
-   file. write< float, Devices::Cuda, int >( cudaFloatData, 3 );
-   if( ! file. close() )
-   {
-      std::cerr << "Unable to close the file test-file.tnl" << std::endl;
-      return;
-   }
-   if( ! file. open( String( "test-file.tnl" ), tnlReadMode ) )
-   {
-      std::cerr << "Unable to open the file test-file.tnl for the testing." << std::endl;
-      return;
-   }
+   ASSERT_TRUE( file.open( String( "test-file.tnl" ), IOMode::write ) );
+   bool status = file.write< int, Devices::Cuda >( cudaIntData );
+   ASSERT_TRUE( status );
+   status = file.write< float, Devices::Cuda, int >( cudaFloatData, 3 );
+   ASSERT_TRUE( status );
+   status = file.write< const double, Devices::Cuda >( cudaConstDoubleData );
+   ASSERT_TRUE( status );
+   ASSERT_TRUE( file.close() );
+   ASSERT_TRUE( file.open( String( "test-file.tnl" ), IOMode::read ) );
    int newIntData;
    float newFloatData[ 3 ];
+   double newDoubleData;
    int* newCudaIntData;
    float* newCudaFloatData;
+   double* newCudaDoubleData;
    cudaMalloc( ( void** ) &newCudaIntData, sizeof( int ) );
    cudaMalloc( ( void** ) &newCudaFloatData, 3 * sizeof( float ) );
-   file. read< int, Devices::Cuda >( newCudaIntData, 1 );
-   file. read< float, Devices::Cuda, int >( newCudaFloatData, 3 );
+   cudaMalloc( ( void** ) &newCudaDoubleData, sizeof( double ) );
+   status = file.read< int, Devices::Cuda >( newCudaIntData, 1 );
+   ASSERT_TRUE( status );
+   status = file.read< float, Devices::Cuda, int >( newCudaFloatData, 3 );
+   ASSERT_TRUE( status );
+   status = file.read< double, Devices::Cuda >( newCudaDoubleData, 1 );
+   ASSERT_TRUE( status );
    cudaMemcpy( &newIntData,
                sizeof( int ),
@@ -121,20 +111,28 @@ TEST( FileTest, WriteAndReadCUDA )
                3 * sizeof( float ),
                cudaMemcpyDeviceToHost );
+   cudaMemcpy( &newDoubleData,
+               newCudaDoubleData,
+               sizeof( double ),
+               cudaMemcpyDeviceToHost );
-   ASSERT_EQ( newIntData, intData );
+   EXPECT_EQ( newIntData, intData );
    for( int i = 0; i < 3; i ++ )
-      ASSERT_EQ( newFloatData[ i ], floatData[ i ] );
+      EXPECT_EQ( newFloatData[ i ], floatData[ i ] );
+   EXPECT_EQ( newDoubleData, constDoubleData );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
+#include "GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/GtestMissingError.h b/src/UnitTests/GtestMissingError.h
new file mode 100644
index 0000000000000000000000000000000000000000..b308a16c8bb02d6afa38f097c48a9242c0512e08
--- /dev/null
+++ b/src/UnitTests/GtestMissingError.h
@@ -0,0 +1,21 @@
+                          GtestMissingError.h  -  description
+                             -------------------
+    begin                : Jul 2, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#pragma once
+#include <stdexcept>
+struct GtestMissingError
+   : public std::runtime_error
+   GtestMissingError()
+   : std::runtime_error( "The GTest library is needed to run the tests." )
+   {}
diff --git a/src/UnitTests/ListTest.cpp b/src/UnitTests/ListTest.cpp
deleted file mode 100644
index 6774d29f578e911d42e919d2dd8ff0b9f81a1610..0000000000000000000000000000000000000000
--- a/src/UnitTests/ListTest.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-                          tnlListTest.cpp  -  description
-                             -------------------
-    begin                : Feb 15, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-/* See Copyright Notice in tnl/Copyright */
-#ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
-#include <TNL/Containers/List.h>
-using namespace TNL;
-#ifdef HAVE_GTEST 
-int main( int argc, char* argv[] )
-#ifdef HAVE_GTEST
-   ::testing::InitGoogleTest( &argc, argv );
-   return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d1a78b2bb0699f5adb68af4c6eda8d5b9791dbf
--- /dev/null
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -0,0 +1,13 @@
+   CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest${mpiExt}${debugExt} SparseMatrixCopyTest.h SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
+                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+   ADD_EXECUTABLE( SparseMatrixCopyTest${mpiExt}${debugExt} SparseMatrixCopyTest.h SparseMatrixCopyTest.cpp )
+   TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest${mpiExt}${debugExt} PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( SparseMatrixCopyTest${mpiExt}${debugExt} ${GTEST_BOTH_LIBRARIES}
+                                                           tnl${mpiExt}${debugExt}-${tnlVersion} )
+ADD_TEST( SparseMatrixCopyTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${mpiExt}${debugExt} )
diff --git a/src/UnitTests/Containers/VectorOperationsTest.cpp b/src/UnitTests/Matrices/SparseMatrixCopyTest.cpp
similarity index 60%
rename from src/UnitTests/Containers/VectorOperationsTest.cpp
rename to src/UnitTests/Matrices/SparseMatrixCopyTest.cpp
index b5a1b57f466d5d3ee768ab2247fa8989bc294266..30b8f64ecfdbf228856d272a71d3de08980f3987 100644
--- a/src/UnitTests/Containers/VectorOperationsTest.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.cpp
@@ -1,12 +1,11 @@
-                          VectorOperationsTest.cpp  -  description
+                          SparseMatrixCopyTest.cpp  -  description
-    begin                : Jul 15, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
 /* See Copyright Notice in tnl/Copyright */
-#include "VectorOperationsTest.h"
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Containers/StaticArrayTest.cu b/src/UnitTests/Matrices/SparseMatrixCopyTest.cu
similarity index 60%
rename from src/UnitTests/Containers/StaticArrayTest.cu
rename to src/UnitTests/Matrices/SparseMatrixCopyTest.cu
index 00fa118fd23a8e5898f6c8736172cc3a70a4fe46..431fe481c2db1d5b18cfa849e882c0ed836463c1 100644
--- a/src/UnitTests/Containers/StaticArrayTest.cu
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.cu
@@ -1,12 +1,11 @@
-                          StaticArrayTest.cu  -  description
+                          SparseMatrixCopyTest.cu  -  description
-    begin                : Feb 10, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
     email                : tomas.oberhuber@fjfi.cvut.cz
 /* See Copyright Notice in tnl/Copyright */
-#include "StaticArrayTest.h"
+#include "SparseMatrixCopyTest.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..a11a8b4442527b371603b8f5d43b70b8e4ff558d
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -0,0 +1,281 @@
+                          SparseMatrixCopyTest.h -  description
+                             -------------------
+    begin                : Jun 25, 2017
+    copyright            : (C) 2017 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include <TNL/Matrices/CSR.h>
+#include <TNL/Matrices/Ellpack.h>
+#include <TNL/Matrices/SlicedEllpack.h>
+using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >;
+using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >;
+using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >;
+using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >;
+using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >;
+using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;
+#ifdef HAVE_GTEST 
+#include <gtest/gtest.h>
+ * Sets up the following 7x6 sparse matrix:
+ *
+ *    / 1  2             \
+ *    | 3  4  5          |
+ *    |    6  7  8       |
+ *    |       9 10 11    |
+ *    |         12 13 14 |
+ *    |            15 16 |
+ *    \               17 /
+ */
+template< typename Matrix >
+void setupMatrix( Matrix& m )
+   const int rows = 7;
+   const int cols = 6;
+   m.reset();
+   m.setDimensions( rows, cols );
+   typename Matrix::CompressedRowLengthsVector rowLengths;
+   rowLengths.setSize( rows );
+   rowLengths.setValue( 3 );
+   rowLengths.setElement( 0 , 4 );
+   rowLengths.setElement( 1,  4 );
+   m.setCompressedRowLengths( rowLengths );
+   int value = 1;
+   for( int i = 0; i < rows; i++ )
+      for( int j = 0; j < 3; j++ )
+         if( i + j - 1 >= 0 && i + j - 1 < cols )
+            m.setElement( i, i + j - 1, value++ );
+template< typename Matrix >
+void checkMatrix( Matrix& m )
+   ASSERT_EQ( m.getRows(), 7 );
+   ASSERT_EQ( m.getColumns(), 6 );
+   EXPECT_EQ( m.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( m.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( m.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 0, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 0 ),  3 );
+   EXPECT_EQ( m.getElement( 1, 1 ),  4 );
+   EXPECT_EQ( m.getElement( 1, 2 ),  5 );
+   EXPECT_EQ( m.getElement( 1, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 1, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 1 ),  6 );
+   EXPECT_EQ( m.getElement( 2, 2 ),  7 );
+   EXPECT_EQ( m.getElement( 2, 3 ),  8 );
+   EXPECT_EQ( m.getElement( 2, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 2, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 3, 2 ),  9 );
+   EXPECT_EQ( m.getElement( 3, 3 ), 10 );
+   EXPECT_EQ( m.getElement( 3, 4 ), 11 );
+   EXPECT_EQ( m.getElement( 3, 5 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 4, 3 ), 12 );
+   EXPECT_EQ( m.getElement( 4, 4 ), 13 );
+   EXPECT_EQ( m.getElement( 4, 5 ), 14 );
+   EXPECT_EQ( m.getElement( 5, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 5, 4 ), 15 );
+   EXPECT_EQ( m.getElement( 5, 5 ), 16 );
+   EXPECT_EQ( m.getElement( 6, 0 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 1 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 2 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 3 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 4 ),  0 );
+   EXPECT_EQ( m.getElement( 6, 5 ), 17 );
+template< typename Matrix1, typename Matrix2 >
+void testCopyAssignment()
+   Matrix1 m1;
+   setupMatrix( m1 );
+   checkMatrix( m1 );
+   Matrix2 m2;
+   m2 = m1;
+   checkMatrix( m2 );
+template< typename Matrix1, typename Matrix2 >
+void testConversion()
+   Matrix1 m1;
+   setupMatrix( m1 );
+   checkMatrix( m1 );
+   Matrix2 m2;
+   TNL::Matrices::copySparseMatrix( m2, m1 );
+   checkMatrix( m2 );
+TEST( SparseMatrixCopyTest, CSR_HostToHost )
+   testCopyAssignment< CSR_host, CSR_host >();
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_HostToCuda )
+   testCopyAssignment< CSR_host, CSR_cuda >();
+TEST( SparseMatrixCopyTest, CSR_CudaToHost )
+   testCopyAssignment< CSR_cuda, CSR_host >();
+TEST( SparseMatrixCopyTest, CSR_CudaToCuda )
+   testCopyAssignment< CSR_cuda, CSR_cuda >();
+TEST( SparseMatrixCopyTest, Ellpack_HostToHost )
+   testCopyAssignment< E_host, E_host >();
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, Ellpack_HostToCuda )
+   testCopyAssignment< E_host, E_cuda >();
+TEST( SparseMatrixCopyTest, Ellpack_CudaToHost )
+   testCopyAssignment< E_cuda, E_host >();
+TEST( SparseMatrixCopyTest, Ellpack_CudaToCuda )
+   testCopyAssignment< E_cuda, E_cuda >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToHost )
+   testCopyAssignment< SE_host, SE_host >();
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, SlicedEllpack_HostToCuda )
+   testCopyAssignment< SE_host, SE_cuda >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToHost )
+   testCopyAssignment< SE_cuda, SE_host >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda )
+   testCopyAssignment< SE_cuda, SE_cuda >();
+// test conversion between formats
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host )
+   testConversion< CSR_host, E_host >();
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_host )
+   testConversion< E_host, CSR_host >();
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_host )
+   testConversion< CSR_host, SE_host >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_host )
+   testConversion< SE_host, CSR_host >();
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host )
+   testConversion< E_host, SE_host >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host )
+   testConversion< SE_host, E_host >();
+#ifdef HAVE_CUDA
+TEST( SparseMatrixCopyTest, CSR_to_Ellpack_cuda )
+   testConversion< CSR_cuda, E_cuda >();
+TEST( SparseMatrixCopyTest, Ellpack_to_CSR_cuda )
+   testConversion< E_cuda, CSR_cuda >();
+TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda )
+   testConversion< CSR_cuda, SE_cuda >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda )
+   testConversion< SE_cuda, CSR_cuda >();
+TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda )
+   testConversion< E_cuda, SE_cuda >();
+TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda )
+   testConversion< SE_cuda, E_cuda >();
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+   throw GtestMissingError();
diff --git a/src/UnitTests/ObjectTest.cpp b/src/UnitTests/ObjectTest.cpp
index 3c495846d3de7b62e8c0efb5050970b2fc275fdc..7b9badd8f9d14ce3a9af45249efe9360b07b2e69 100644
--- a/src/UnitTests/ObjectTest.cpp
+++ b/src/UnitTests/ObjectTest.cpp
@@ -14,7 +14,7 @@
 #include <TNL/Containers/Array.h>
 #ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
+#include <gtest/gtest.h>
 using namespace TNL;
@@ -24,11 +24,13 @@ TEST( ObjectTest, SaveAndLoadTest )
    Object testObject;
    File file;
-   file.open( "test-file.tnl", tnlWriteMode );
+   file.open( "test-file.tnl", IOMode::write );
    ASSERT_TRUE( testObject.save( file ) );
-   file.open( "test-file.tnl", tnlReadMode );
+   file.open( "test-file.tnl", IOMode::read );
    ASSERT_TRUE( testObject.load( file ) );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
 TEST( ObjectTest, parseObjectTypeTest )
@@ -104,14 +106,13 @@ TEST( ObjectTest, parseObjectTypeTest )
+#include "GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/StringTest.cpp b/src/UnitTests/StringTest.cpp
index 3a0444ed64801402a95cec5a2ba80aa739e05163..6651455589ceeea3681930e1efb621259381fb46 100644
--- a/src/UnitTests/StringTest.cpp
+++ b/src/UnitTests/StringTest.cpp
@@ -9,11 +9,12 @@
 /* See Copyright Notice in tnl/Copyright */
 #ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
+#include <gtest/gtest.h>
 #include <TNL/String.h>
 #include <TNL/File.h>
+#include <TNL/Containers/List.h>
 using namespace TNL;
@@ -21,7 +22,7 @@ using namespace TNL;
 TEST( StringTest, BasicConstructor )
    String str;
-   ASSERT_EQ( strcmp( str. getString(), "" ), 0 );
+   EXPECT_EQ( strcmp( str.getString(), "" ), 0 );
 TEST( StringTest, ConstructorWithChar )
@@ -31,10 +32,10 @@ TEST( StringTest, ConstructorWithChar )
    String str3( "string3xxx", 0, 3 );
    String str4( "xxxstring4xxx", 3, 3 );
-   ASSERT_EQ( strcmp( str1. getString(), "string1" ), 0 );
-   ASSERT_EQ( strcmp( str2. getString(), "string2" ), 0 );
-   ASSERT_EQ( strcmp( str3. getString(), "string3" ), 0 );
-   ASSERT_EQ( strcmp( str4. getString(), "string4" ), 0 );
+   EXPECT_EQ( strcmp( str1.getString(), "string1" ), 0 );
+   EXPECT_EQ( strcmp( str2.getString(), "string2" ), 0 );
+   EXPECT_EQ( strcmp( str3.getString(), "string3" ), 0 );
+   EXPECT_EQ( strcmp( str4.getString(), "string4" ), 0 );
 TEST( StringTest, CopyConstructor )
@@ -44,103 +45,255 @@ TEST( StringTest, CopyConstructor )
    String string2( string );
    String emptyString2( emptyString );
-   ASSERT_EQ( strcmp( string2. getString(), "string1" ), 0 );
-   ASSERT_EQ( strcmp( emptyString2. getString(), "" ), 0 );
+   EXPECT_EQ( strcmp( string2.getString(), "string1" ), 0 );
+   EXPECT_EQ( strcmp( emptyString2.getString(), "" ), 0 );
 TEST( StringTest, ConstructorWithNumber )
    String string1( 10 );
    String string2( -5 );
+   String string3( true );
+   String string4( false );
-   ASSERT_EQ( strcmp( string1. getString(), "10" ), 0 );
-   ASSERT_EQ( strcmp( string2. getString(), "-5" ), 0 );
+   EXPECT_EQ( strcmp( string1.getString(), "10" ), 0 );
+   EXPECT_EQ( strcmp( string2.getString(), "-5" ), 0 );
+   EXPECT_EQ( strcmp( string3.getString(), "true" ), 0 );
+   EXPECT_EQ( strcmp( string4.getString(), "false" ), 0 );
+TEST( StringTest, SetSize )
+   String str;
+   str.setSize( 42 );
+   EXPECT_EQ( str.getAllocatedSize(), 256 );
+   // it allocates one more byte for the terminating 0
+   str.setSize( 256 );
+   EXPECT_EQ( str.getAllocatedSize(), 512 );
 TEST( StringTest, SetString )
    String str1, str2, str3, str4;
-   str1. setString( "string1" );
-   str2. setString( "xxxstring2", 3 );
-   str3. setString( "string3xxx", 0, 3 );
-   str4. setString( "xxxstring4xxx", 3, 3 );
+   str1.setString( "string1" );
+   str2.setString( "xxxstring2", 3 );
+   str3.setString( "string3xxx", 0, 3 );
+   str4.setString( "xxxstring4xxx", 3, 3 );
-   ASSERT_EQ( strcmp( str1. getString(), "string1" ), 0 );
-   ASSERT_EQ( strcmp( str2. getString(), "string2" ), 0 );
-   ASSERT_EQ( strcmp( str3. getString(), "string3" ), 0 );
-   ASSERT_EQ( strcmp( str4. getString(), "string4" ), 0 );
+   EXPECT_EQ( strcmp( str1.getString(), "string1" ), 0 );
+   EXPECT_EQ( strcmp( str2.getString(), "string2" ), 0 );
+   EXPECT_EQ( strcmp( str3.getString(), "string3" ), 0 );
+   EXPECT_EQ( strcmp( str4.getString(), "string4" ), 0 );
 TEST( StringTest, IndexingOperator )
    String str( "1234567890" );
-   ASSERT_EQ( str[ 0 ], '1' );
-   ASSERT_EQ( str[ 1 ], '2' );
-   ASSERT_EQ( str[ 2 ], '3' );
-   ASSERT_EQ( str[ 3 ], '4' );
-   ASSERT_EQ( str[ 4 ], '5' );
-   ASSERT_EQ( str[ 5 ], '6' );
-   ASSERT_EQ( str[ 6 ], '7' );
-   ASSERT_EQ( str[ 7 ], '8' );
-   ASSERT_EQ( str[ 8 ], '9' );
-   ASSERT_EQ( str[ 9 ], '0' );
+   EXPECT_EQ( str[ 0 ], '1' );
+   EXPECT_EQ( str[ 1 ], '2' );
+   EXPECT_EQ( str[ 2 ], '3' );
+   EXPECT_EQ( str[ 3 ], '4' );
+   EXPECT_EQ( str[ 4 ], '5' );
+   EXPECT_EQ( str[ 5 ], '6' );
+   EXPECT_EQ( str[ 6 ], '7' );
+   EXPECT_EQ( str[ 7 ], '8' );
+   EXPECT_EQ( str[ 8 ], '9' );
+   EXPECT_EQ( str[ 9 ], '0' );
-TEST( StringTest, AssignmentOperator )
+TEST( StringTest, CStringOperators )
-   String string1( "string" );
-   String string2;
-   string2 = string1;
+   // assignment operator
+   String string1;
+   string1 = "string";
+   EXPECT_EQ( strcmp( string1.getString(), "string" ), 0 );
+   // addition
+   string1 += "string2";
+   EXPECT_EQ( strcmp( string1.getString(), "stringstring2" ), 0 );
+   // addition that forces a new page allocation
+   string1 += " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long";
+   EXPECT_EQ( strcmp( string1.getString(),
+              "stringstring2"
+              " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long"
+              " long long long long long long long long long long long long long long" ),
+            0 );
-   ASSERT_EQ( strcmp( string2. getString(), "string" ), 0 );
+   // addition
+   EXPECT_EQ( strcmp( (String( "foo " ) + "bar").getString(), "foo bar" ), 0 );
+   EXPECT_EQ( strcmp( ("foo" + String( " bar" )).getString(), "foo bar" ), 0 );
+   // comparison
+   EXPECT_EQ( String( "foo" ), "foo" );
+   EXPECT_NE( String( "bar" ), "foo" );
+   EXPECT_NE( String( "fooo" ), "foo" );
-TEST( StringTest, AdditionAssignmentOperator )
+TEST( StringTest, StringOperators )
+   // assignment
    String string1( "string" );
    String string2;
    string2 = string1;
-   string2 += "string2";
+   EXPECT_EQ( strcmp( string2.getString(), "string" ), 0 );
+   // addition
+   string1.setString( "foo " );
+   string1 += String( "bar" );
+   EXPECT_EQ( strcmp( string1.getString(), "foo bar" ), 0 );
+   // comparison
+   EXPECT_EQ( String( "foo bar" ), string1 );
+   EXPECT_NE( String( "bar" ), string1 );
+   EXPECT_NE( String( "bar" ), String( "baz" ) );
+   EXPECT_NE( String( "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long" ),
+              String( "short" ) );
+   String string3( "long long long long long long long long long long long "
+                   "long long long long long long long long long long long "
+                   "long long long long long long long long long long long "
+                   "long long long long long long long long long long long "
+                   "long long long long long long long long long long long "
+                   "long long long long long long long long long long long" );
+   string3[ 255 ] = 0;
+   EXPECT_EQ( string3,
+              String( "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long long long long long "
+                      "long long long long long long long " ) );
+   // addition
+   EXPECT_EQ( String( "foo " ) + String( "bar" ), "foo bar" );
+TEST( StringTest, SingleCharacterOperators )
+   // assignment
+   String string1;
+   string1 = 'A';
+   EXPECT_EQ( strcmp( string1.getString(), "A" ), 0 );
+   // addition of a single character
+   String string2( "string " );
+   string2 += 'A';
+   EXPECT_EQ( strcmp( string2.getString(), "string A" ), 0 );
-   ASSERT_EQ( strcmp( string2. getString(), "stringstring2" ), 0 );
+   // addition of a single character that causes new page allocation
+   string2.setString( "long long long long long long long long long long long long long "
+                      "long long long long long long long long long long long long long "
+                      "long long long long long long long long long long long long long "
+                      "long long long long long long long long long long long long " );
+   ASSERT_EQ( string2.getLength(), 255 );
+   string2 += 'B';
+   EXPECT_EQ( strcmp( string2.getString(),
+                  "long long long long long long long long long long long long long "
+                  "long long long long long long long long long long long long long "
+                  "long long long long long long long long long long long long long "
+                  "long long long long long long long long long long long long B" ),
+              0 );
+   // addition
+   EXPECT_EQ( strcmp( (String( "A " ) + 'B').getString(), "A B" ), 0 );
+   EXPECT_EQ( strcmp( ('A' + String( " B" )).getString(), "A B" ), 0 );
+   // comparison
+   EXPECT_EQ( String( "A" ), 'A' );
+   EXPECT_NE( String( "B" ), 'A' );
+   EXPECT_NE( String( "AB" ), 'A' );
+TEST( StringTest, CastToBoolOperator )
+   String string;
+   EXPECT_TRUE( ! string );
+   EXPECT_FALSE( string );
+   string.setString( "foo" );
+   EXPECT_TRUE( string );
+   EXPECT_FALSE( ! string );
+TEST( StringTest, replace )
+   EXPECT_EQ( String( "string" ).replace( "ing", "bc" ), "strbc" );
+   EXPECT_EQ( String( "abracadabra" ).replace( "ab", "CAT" ), "CATracadCATra" );
+   EXPECT_EQ( String( "abracadabra" ).replace( "ab", "CAT", 1 ), "CATracadabra" );
 TEST( StringTest, strip )
-   EXPECT_EQ( String( "string" ).strip(), String( "string" ) );
-   EXPECT_EQ( String( "  string" ).strip(), String( "string" ) );
-   EXPECT_EQ( String( "string  " ).strip(), String( "string" ) );
-   EXPECT_EQ( String( "  string  " ).strip(), String( "string" ) );
-   EXPECT_EQ( String( " string1  string2  " ).strip(), String( "string1  string2" ) );
-   EXPECT_EQ( String( "" ).strip(), String( "" ) );
-   EXPECT_EQ( String( "  " ).strip(), String( "" ) );
+   EXPECT_EQ( String( "string" ).strip(), "string" );
+   EXPECT_EQ( String( "  string" ).strip(), "string" );
+   EXPECT_EQ( String( "string  " ).strip(), "string" );
+   EXPECT_EQ( String( "  string  " ).strip(), "string" );
+   EXPECT_EQ( String( " string1  string2  " ).strip(), "string1  string2" );
+   EXPECT_EQ( String( "" ).strip(), "" );
+   EXPECT_EQ( String( "  " ).strip(), "" );
+TEST( StringTest, split )
+   Containers::List< String > list;
+   String( "A B C" ).split( list, ' ' );
+   ASSERT_EQ( list.getSize(), 3 );
+   EXPECT_EQ( list[ 0 ], "A" );
+   EXPECT_EQ( list[ 1 ], "B" );
+   EXPECT_EQ( list[ 2 ], "C" );
+   String( "abracadabra" ).split( list, 'a' );
+   ASSERT_EQ( list.getSize(), 4 );
+   EXPECT_EQ( list[ 0 ], "br" );
+   EXPECT_EQ( list[ 1 ], "c" );
+   EXPECT_EQ( list[ 2 ], "d" );
+   EXPECT_EQ( list[ 3 ], "br" );
+   String( "abracadabra" ).split( list, 'b' );
+   ASSERT_EQ( list.getSize(), 3 );
+   EXPECT_EQ( list[ 0 ], "a" );
+   EXPECT_EQ( list[ 1 ], "racada" );
+   EXPECT_EQ( list[ 2 ], "ra" );
+   String( "abracadabra" ).split( list, 'A' );
+   ASSERT_EQ( list.getSize(), 1 );
+   EXPECT_EQ( list[ 0 ], "abracadabra" );
 TEST( StringTest, SaveLoad )
    String str1( "testing-string" );
    File file;
-   file.open( "test-file.tnl", tnlWriteMode );
+   file.open( "test-file.tnl", IOMode::write );
    ASSERT_TRUE( str1.save( file ) );
-   file.open( "test-file.tnl", tnlReadMode );
+   file.open( "test-file.tnl", IOMode::read );
    String str2;
    ASSERT_TRUE( str2.load( file ) );
-   ASSERT_EQ( str1, str2 );
+   EXPECT_EQ( str1, str2 );
+   EXPECT_EQ( std::remove( "test-file.tnl" ), 0 );
+#include "GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/UnitTests/UniquePointerTest.cpp b/src/UnitTests/UniquePointerTest.cpp
index 579fd2569df3e9b6b68f7e21d426836a7a1abf12..677b3e2bb98a07508ec059b2e6a350375785c9b1 100644
--- a/src/UnitTests/UniquePointerTest.cpp
+++ b/src/UnitTests/UniquePointerTest.cpp
@@ -21,7 +21,7 @@
 #include <TNL/Containers/StaticArray.h>
 #ifdef HAVE_GTEST 
-#include "gtest/gtest.h"
+#include <gtest/gtest.h>
 using namespace TNL;
@@ -47,12 +47,13 @@ TEST( UniquePointerTest, ConstructorTest )
+#include "GtestMissingError.h"
 int main( int argc, char* argv[] )
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-   return EXIT_FAILURE;
+   throw GtestMissingError();
diff --git a/src/core/tnlTypeInfo.h b/src/core/tnlTypeInfo.h
deleted file mode 100644
index 5d5282f3c4ed13bcc190446f7c1026ac28663944..0000000000000000000000000000000000000000
--- a/src/core/tnlTypeInfo.h
+++ /dev/null
@@ -1,39 +0,0 @@
- * File:   tnlTypeInfo.h
- * Author: oberhuber
- *
- * Created on July 14, 2016, 3:46 PM
- */
-#pragma once
-#include <limits>
-template< typename Type >
-class tnlTypeInfo
-class tnlTypeInfo< double >
-   public:
-      typedef double Type;
-      static __cuda_callable__
-      Type getMaxValue() { return DBL_MAX; };
-class tnlTypeInfo< float >
-   public:
-      typedef float Type;
-      static __cuda_callable__
-      Type getMaxValue() { return FLT_MAX; };
diff --git a/src/functions/tnlFunctions.h b/src/functions/tnlFunctions.h
deleted file mode 100644
index 326fa6024b37256360fb6653d195310fe6eb9a98..0000000000000000000000000000000000000000
--- a/src/functions/tnlFunctions.h
+++ /dev/null
@@ -1,52 +0,0 @@
- * File:   tnlFunctions.h
- * Author: oberhuber
- *
- * Created on July 11, 2016, 6:01 PM
- */
-#pragma once
-#include <core/tnlCuda.h>
-template< typename Real >
-Real sign( const Real& x, const Real& smoothing = 0.0 )
-   if( x > smoothing )
-      return 1.0;
-   else if( x < -smoothing )
-      return -1.0;
-   if( smoothing == 0.0 )
-      return 0.0;
-   return sin( ( M_PI * x ) / ( 2.0 * smoothing ) );
-template< typename Real >
-Real positivePart( const Real& arg)
-   return arg > 0.0 ? arg : 0.0;
-template< typename Real >
-Real negativePart( const Real& arg)
-   return arg < 0.0 ? arg : 0.0;
-template< typename Real >
-Real ArgAbsMin( const Real& x, const Real& y )
-   return fabs( x ) < fabs( y ) ?  x : y;
-template< typename Real >
-Real ArgAbsMax( const Real& x, const Real& y )
-   return fabs( x ) > fabs( y ) ?  x : y;
diff --git a/src/operators/tnlDirichletBoundaryConditions_impl.h b/src/operators/tnlDirichletBoundaryConditions_impl.h
deleted file mode 100644
index 0fed06fc59345dec8ddd799007d8eed3cb9ba8dd..0000000000000000000000000000000000000000
--- a/src/operators/tnlDirichletBoundaryConditions_impl.h
+++ /dev/null
@@ -1,144 +0,0 @@
-                          tnlDirichletBoundaryConditions_impl.h  -  description
-                             -------------------
-    begin                : Nov 17, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#include <functions/tnlFunctionAdapter.h>
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-configSetup( tnlConfigDescription& config,
-             const String& prefix )
-   Function::configSetup( config );
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-setup( const Config::ParameterContainer& parameters,
-       const String& prefix )
-   return this->function.setup( parameters );
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-setFunction( const Function& function )
-   this->function = function;
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-   return this->function;
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-const Function&
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-getFunction() const
-   return *this->function;
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-template< typename EntityType,
-          typename MeshFunction >
-const Real
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-operator()( const MeshFunction& u,
-            const EntityType& entity,            
-            const RealType& time ) const
-   //static_assert( EntityType::getDimensions() == MeshEntitiesDimension, "Wrong mesh entity dimensions." );
-   return tnlFunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time );
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-   template< typename EntityType >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-getLinearSystemRowLength( const MeshType& mesh,
-                          const IndexType& index,
-                          const EntityType& entity ) const
-   return 1;
-template< typename Mesh,
-          typename Function,
-          int MeshEntitiesDimension,
-          typename Real,
-          typename Index >
-   template< typename Matrix,
-             typename EntityType,
-             typename MeshFunction >
-tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimension, Real, Index >::
-updateLinearSystem( const RealType& time,
-                    const MeshType& mesh,
-                    const IndexType& index,
-                    const EntityType& entity,
-                    const MeshFunction& u,
-                    DofVectorType& b,
-                    Matrix& matrix ) const
-   typename Matrix::MatrixRow matrixRow = matrix.getRow( index );
-   matrixRow.setElement( 0, index, 1.0 );
-   b[ index ] = tnlFunctionAdapter< MeshType, Function >::getValue( this->function, entity, time );
diff --git a/src/operators/tnlNeumannReflectionBoundaryConditions.h b/src/operators/tnlNeumannReflectionBoundaryConditions.h
deleted file mode 100644
index 914d50de797f66c5323be7c0b261b193ef3ede71..0000000000000000000000000000000000000000
--- a/src/operators/tnlNeumannReflectionBoundaryConditions.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#pragma once
-#include <TNL/Containers/StaticVector.h>
-#include <core/vectors/tnlSharedVector.h>
-#include <TNL/Config/ParameterContainer.h>
-#include <functions/tnlConstantFunction.h>
-template< typename Mesh,
-          typename Real = typename Mesh::RealType,
-          typename Index = typename Mesh::IndexType >
-class tnlNeumannReflectionBoundaryConditions
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNeumannReflectionBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >
-   public:
-   typedef tnlGrid< 1, MeshReal, Device, MeshIndex > MeshType;
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef tnlSharedVector< RealType, DeviceType, IndexType > SharedVector;
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef Containers::StaticVector< 1, RealType > PointType;
-   typedef typename MeshType::CoordinatesType CoordinatesType;
-   bool setup( const Config::ParameterContainer& parameters,
-              const String& prefix = "" );
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,   
-                              const RealType& time = 0 ) const;
-   CoordinatesType tmp;
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNeumannReflectionBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >
-   public:
-   typedef tnlGrid< 2, MeshReal, Device, MeshIndex > MeshType;
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef tnlSharedVector< RealType, DeviceType, IndexType > SharedVector;
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef Containers::StaticVector< 2, RealType > PointType;
-   typedef typename MeshType::CoordinatesType CoordinatesType;
-   bool setup( const Config::ParameterContainer& parameters,
-              const String& prefix = "" );
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,   
-                              const RealType& time = 0 ) const;
-   CoordinatesType tmp;
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNeumannReflectionBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >
-   public:
-   typedef tnlGrid< 3, MeshReal, Device, MeshIndex > MeshType;
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef tnlSharedVector< RealType, DeviceType, IndexType > SharedVector;
-   typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
-   typedef Containers::StaticVector< 3, RealType > PointType;
-   typedef typename MeshType::CoordinatesType CoordinatesType;
-   bool setup( const Config::ParameterContainer& parameters,
-              const String& prefix = "" );
-   template< typename EntityType,
-             typename MeshFunction >
-   __cuda_callable__
-   const RealType operator()( const MeshFunction& u,
-                              const EntityType& entity,   
-                              const RealType& time = 0 ) const;
-   private:
-   CoordinatesType tmp;
-#include <operators/tnlNeumannReflectionBoundaryConditions_impl.h>
diff --git a/src/operators/tnlNeumannReflectionBoundaryConditions_impl.h b/src/operators/tnlNeumannReflectionBoundaryConditions_impl.h
deleted file mode 100644
index d791383520bd267beae784fe640d012ddd3147e0..0000000000000000000000000000000000000000
--- a/src/operators/tnlNeumannReflectionBoundaryConditions_impl.h
+++ /dev/null
@@ -1,171 +0,0 @@
-#pragma once
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNeumannReflectionBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >::
-setup( const Config::ParameterContainer& parameters,
-      const String& prefix )
-   return true;
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-   template< typename EntityType,
-             typename MeshFunction >
-const Real
-tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-operator()( const MeshFunction& u,
-            const EntityType& entity,
-            const RealType& time ) const
-#ifdef HAVE_CUDA
-   __device__ __host__
-void tnlNeumannReflectionBoundaryConditions< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index >::
-setBoundaryConditions( const RealType& time,
-                       const MeshType& mesh,
-                       const IndexType index,
-                       const CoordinatesType& coordinates,
-                       DofVectorType& u,
-                       DofVectorType& fu )
-	tmp = coordinates;
-   if(coordinates.x() == 0)
-	   tmp.x() = 1;
-   else if(coordinates.x() == mesh. getDimensions().x() - 1)
-	   tmp.x() = coordinates.x() - 2;
-   u[ index ] = u[mesh.getCellIndex( tmp )];
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNeumannReflectionBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-setup( const Config::ParameterContainer& parameters,
-      const String& prefix )
-   return true;
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-   template< typename EntityType,
-             typename MeshFunction >
-const Real
-tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-operator()( const MeshFunction& u,
-            const EntityType& entity,
-            const RealType& time ) const
-#ifdef HAVE_CUDA
-   __device__ __host__
-void tnlNeumannReflectionBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >::
-setBoundaryConditions( const RealType& time,
-                       const MeshType& mesh,
-                       const IndexType index,
-                       const CoordinatesType& coordinates,
-                       DofVectorType& u,
-                       DofVectorType& fu )
-	tmp = coordinates;
-   if(coordinates.x() == 0)
-	   tmp.x() = coordinates.x() + 2;
-   else if(coordinates.x() == mesh. getDimensions().x() - 1)
-	   tmp.x() = coordinates.x() - 2;
-   if(coordinates.y() == 0)
-	   tmp.y() = coordinates.y() + 2;
-   else if(coordinates.y() == mesh. getDimensions().y() - 1)
-	   tmp.y() = coordinates.y() - 2;
-   u[ index ] = u[mesh.getCellIndex( tmp )];
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNeumannReflectionBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-setup( const Config::ParameterContainer& parameters,
-      const String& prefix )
-   return true;
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-   template< typename EntityType,
-             typename MeshFunction >
-const Real
-tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Function, Real, Index >::
-operator()( const MeshFunction& u,
-            const EntityType& entity,
-            const RealType& time ) const
-#ifdef HAVE_CUDA
-   __device__ __host__
-void tnlNeumannReflectionBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >::
-setBoundaryConditions( const RealType& time,
-                       const MeshType& mesh,
-                       const IndexType index,
-                       const CoordinatesType& coordinates,
-                       DofVectorType& u,
-                       DofVectorType& fu )
-	tmp = coordinates;
-   if(coordinates.x() == 0)
-	   tmp.x() = coordinates.x() + 2;
-   else if(coordinates.x() == mesh. getDimensions().x() - 1)
-	   tmp.x() = coordinates.x() - 2;
-   if(coordinates.y() == 0)
-	   tmp.y() = coordinates.y() + 2;
-   else if(coordinates.y() == mesh. getDimensions().y() - 1)
-	   tmp.y() = coordinates.y() - 2;
-   if(coordinates.z() == 0)
-	   tmp.z() = coordinates.z() + 2;
-   else if(coordinates.z() == mesh. getDimensions().z() - 1)
-	   tmp.z() = coordinates.z() - 2;
-   u[ index ] = u[mesh.getCellIndex( tmp )];
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
old mode 100755
new mode 100644
index d813a07bae2fa2484ed5c853f3eb2e8a9d6153d5..21090988a1322a52fdb10006ef413e95db0504a7
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -4,6 +4,7 @@ ADD_SUBDIRECTORY( data )
 ADD_SUBDIRECTORY( benchmarks )
 #ADD_SUBDIRECTORY( unit-tests )
 ADD_SUBDIRECTORY( long-time-unit-tests )
\ No newline at end of file
diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/benchmarks/array-operations.h b/tests/benchmarks/array-operations.h
index a7c1513e47f122adbc7fd334c2535b4d9cb75777..504dcc1da03a91fa87af913008f9355579e62930 100644
--- a/tests/benchmarks/array-operations.h
+++ b/tests/benchmarks/array-operations.h
@@ -36,21 +36,12 @@ benchmarkArrayOperations( Benchmark & benchmark,
     HostArray hostArray, hostArray2;
     CudaArray deviceArray, deviceArray2;
-    if( ! hostArray.setSize( size ) ||
-        ! hostArray2.setSize( size )
+    hostArray.setSize( size );
+    hostArray2.setSize( size );
 #ifdef HAVE_CUDA
-        ||
-        ! deviceArray.setSize( size ) ||
-        ! deviceArray2.setSize( size )
+    deviceArray.setSize( size );
+    deviceArray2.setSize( size );
-    )
-    {
-        const char* msg = "error: allocation of arrays failed";
-        std::cerr << msg << std::endl;
-        benchmark.addErrorMessage( msg );
-        return false;
-    }
     Real resultHost, resultDevice;
diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
index 8c67951c9093d670f310753306e6eea04a9a4858..4e260b0f711851d114f29122e8c02ba299ffcb72 100644
--- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
+++ b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h
@@ -37,14 +37,14 @@ operator()( const MeshFunction& u,
     * The following example is the Laplace operator approximated 
     * by the Finite difference method.
-    static_assert( MeshEntity::entityDimension == 1, "Wrong mesh entity dimension." ); 
+    static_assert( MeshEntity::getEntityDimension() == 1, "Wrong mesh entity dimension." ); 
     static_assert( MeshFunction::getEntitiesDimension() == 1, "Wrong preimage function" ); 
-    const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+    const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-   const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); 
+   const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+   const IndexType& west = neighborEntities.template getEntityIndex< -1 >(); 
    return ( u[ west ] - 2.0 * u[ center ]  + u[ east ] ) * hxSquareInverse;
@@ -95,11 +95,11 @@ setMatrixElements( const RealType& time,
     * by the Finite difference method.
-    const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); 
+    const typename MeshEntity::template NeighborEntities< 1 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); 
-   const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); 
+   const IndexType& east = neighborEntities.template getEntityIndex< 1 >(); 
+   const IndexType& west = neighborEntities.template getEntityIndex< -1 >(); 
    matrixRow.setElement( 0, west,   - lambdaX );
    matrixRow.setElement( 1, center, 2.0 * lambdaX );
    matrixRow.setElement( 2, east,   - lambdaX );
@@ -141,17 +141,17 @@ operator()( const MeshFunction& u,
     * The following example is the Laplace operator approximated 
     * by the Finite difference method.
-   /*static_assert( MeshEntity::entityDimension == 2, "Wrong mesh entity dimension." ); 
+   /*static_assert( MeshEntity::getEntityDimension() == 2, "Wrong mesh entity dimension." ); 
    static_assert( MeshFunction::getEntitiesDimension() == 2, "Wrong preimage function" ); 
-   const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+   const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >(); */
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >(); */
    const IndexType& xSize = entity.getMesh().getDimensions().x();
    const IndexType& c = entity.getIndex();
@@ -208,14 +208,14 @@ setMatrixElements( const RealType& time,
     * by the Finite difference method.
-    const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); 
+    const typename MeshEntity::template NeighborEntities< 2 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); 
    const RealType& lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1 >(); 
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1 >(); 
    matrixRow.setElement( 0, south,  -lambdaY );
    matrixRow.setElement( 1, west,   -lambdaX );
    matrixRow.setElement( 2, center, 2.0 * ( lambdaX + lambdaY ) );
@@ -259,20 +259,20 @@ operator()( const MeshFunction& u,
     * The following example is the Laplace operator approximated 
     * by the Finite difference method.
-    static_assert( MeshEntity::entityDimension == 3, "Wrong mesh entity dimension." ); 
+    static_assert( MeshEntity::getEntityDimension() == 3, "Wrong mesh entity dimension." ); 
     static_assert( MeshFunction::getEntitiesDimension() == 3, "Wrong preimage function" ); 
-    const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+    const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >(); 
    const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >(); 
    const RealType& hzSquareInverse = entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >(); 
-   const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-   const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >(); 
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >(); 
+   const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+   const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >(); 
    return ( u[ west ] - 2.0 * u[ center ] + u[ east ]  ) * hxSquareInverse +
           ( u[ south ] - 2.0 * u[ center ] + u[ north ] ) * hySquareInverse +
           ( u[ up ] - 2.0 * u[ center ] + u[ down ] ) * hzSquareInverse;
@@ -325,17 +325,17 @@ setMatrixElements( const RealType& time,
     * by the Finite difference method.
-    const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); 
+    const typename MeshEntity::template NeighborEntities< 3 >& neighborEntities = entity.getNeighborEntities(); 
    const RealType& lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2,  0,  0 >(); 
    const RealType& lambdaY = tau * entity.getMesh().template getSpaceStepsProducts<  0, -2,  0 >(); 
    const RealType& lambdaZ = tau * entity.getMesh().template getSpaceStepsProducts<  0,  0, -2 >(); 
    const IndexType& center = entity.getIndex(); 
-   const IndexType& east  = neighbourEntities.template getEntityIndex<  1,  0,  0 >(); 
-   const IndexType& west  = neighbourEntities.template getEntityIndex< -1,  0,  0 >(); 
-   const IndexType& north = neighbourEntities.template getEntityIndex<  0,  1,  0 >(); 
-   const IndexType& south = neighbourEntities.template getEntityIndex<  0, -1,  0 >(); 
-   const IndexType& up    = neighbourEntities.template getEntityIndex<  0,  0,  1 >(); 
-   const IndexType& down  = neighbourEntities.template getEntityIndex<  0,  0, -1 >(); 
+   const IndexType& east  = neighborEntities.template getEntityIndex<  1,  0,  0 >(); 
+   const IndexType& west  = neighborEntities.template getEntityIndex< -1,  0,  0 >(); 
+   const IndexType& north = neighborEntities.template getEntityIndex<  0,  1,  0 >(); 
+   const IndexType& south = neighborEntities.template getEntityIndex<  0, -1,  0 >(); 
+   const IndexType& up    = neighborEntities.template getEntityIndex<  0,  0,  1 >(); 
+   const IndexType& down  = neighborEntities.template getEntityIndex<  0,  0, -1 >(); 
    matrixRow.setElement( 0, down,   -lambdaZ );
    matrixRow.setElement( 1, south,  -lambdaY );
    matrixRow.setElement( 2, west,   -lambdaX );
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
index 07e78a73abb8daf9794d49d293afba0e6b59deb6..573eab7885f83c8ffb2f26a6a4438b3129258099 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
+++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h
@@ -459,7 +459,7 @@ getExplicitUpdate( const RealType& time,
       if( this->cudaKernelType == "templated-compact" )
-         typedef typename MeshType::MeshEntity< 2 > CellType;
+         typedef typename MeshType::EntityType< 2 > CellType;
          //typedef typename MeshType::Cell CellType;
          //std::cerr << "Size of entity is ... " << sizeof( TestEntity< MeshType > ) << " vs. " << sizeof( CellType ) << std::endl;
          typedef typename CellType::CoordinatesType CoordinatesType;
@@ -494,7 +494,7 @@ getExplicitUpdate( const RealType& time,
                     gridYIdx );
-         checkCudaDevice;
          //std::cerr << "Computing the heat equation ..." << std::endl;
          for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ )
@@ -514,7 +514,7 @@ getExplicitUpdate( const RealType& time,
                     gridYIdx );
-         checkCudaDevice;
       if( this->cudaKernelType == "templated" )
diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
index 9ba4c06c944e0906399fe3079dc98e2fc2ddbc3f..dd4f33bc2225fe51796654b2b473b62fe4ea4e06 100644
--- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
+++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h
@@ -2,7 +2,7 @@
 #define HeatEquationBenchmarkRHS_H_
 #include <TNL/Functions/Domain.h>
 template< typename Mesh, typename Real >class HeatEquationBenchmarkRhs
-  : public Functions::Domain< Mesh::meshDimension, Functions::MeshDomain > 
+  : public Functions::Domain< Mesh::getMeshDimension(), Functions::MeshDomain > 
diff --git a/tests/benchmarks/heat-equation-benchmark/TestGridEntity.h b/tests/benchmarks/heat-equation-benchmark/TestGridEntity.h
index afc7e5e6f7931a2aaf1b6d3cb40f378ed600f21a..3492b219807f4650ed665b2ee57c77754f5934f1 100644
--- a/tests/benchmarks/heat-equation-benchmark/TestGridEntity.h
+++ b/tests/benchmarks/heat-equation-benchmark/TestGridEntity.h
@@ -18,12 +18,12 @@
 #pragma once 
 template< typename GridEntity >
-class TestNeighbourGridEntitiesStorage
+class TestNeighborGridEntitiesStorage
-      TestNeighbourGridEntitiesStorage( const GridEntity& entity )
+      TestNeighborGridEntitiesStorage( const GridEntity& entity )
       : entity( entity )
@@ -79,13 +79,13 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension
       typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
       typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
       typedef TestGridEntity< GridType, entityDimension > ThisType;
-      typedef TestNeighbourGridEntitiesStorage< ThisType > NeighbourGridEntitiesStorageType;
+      typedef TestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
       __cuda_callable__ inline
       TestGridEntity( const GridType& grid )
       : grid( grid ),
         /*entityIndex( -1 ),*/
-        neighbourEntitiesStorage( *this )
+        neighborEntitiesStorage( *this )
@@ -98,7 +98,7 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension
       : grid( grid ),
         /*entityIndex( -1 ),
         coordinates( coordinates ),*/
-        neighbourEntitiesStorage( *this )
+        neighborEntitiesStorage( *this )
@@ -116,7 +116,7 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension
       EntityBasisType basis;
-      NeighbourGridEntitiesStorageType neighbourEntitiesStorage;
+      NeighborGridEntitiesStorageType neighborEntitiesStorage;
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h b/tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h
index d54da5db86516dee835a23ead5e672b9e52253f4..c10cec6ee9fa2a2f5762bcac0c80607c2a3d8326 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnlTestGrid2D.h
@@ -15,7 +15,7 @@
 #include <core/tnlObject.h>
 #include <core/Devices::Host.h>
 #include <TNL/Containers/StaticVector.h>
-#include <core/vectors/tnlVector.h>
+#include <TNL/Containers/Vector.h>
 template< int Dimension,
           typename Real = double,
@@ -32,12 +32,12 @@ class Grid : public tnlObject
 #include <mesh/grids/GridEntityTopology.h>
 #include <mesh/grids/GridEntityGetter.h>
 #include <mesh/grids/GridEntityConfig.h>
-#include <mesh/grids/NeighbourGridEntityGetter.h>
+#include <mesh/grids/NeighborGridEntityGetter.h>
 #include <core/tnlLogger.h>
 // TODO: remove this
 //#include <../tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h>
-//#include <../tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h>
+//#include <../tests/benchmarks/heat-equation-benchmark/tnlTestNeighborGridEntityGetter2D_impl.h>
 template< typename Real,
@@ -192,7 +192,7 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject
 #include <core/tnlTNL_ASSERT.h>
 #include <mesh/GnuplotWriter.h>
 #include <mesh/grids/GridEntityGetter_impl.h>
-#include <mesh/grids/NeighbourGridEntityGetter2D_impl.h>
+#include <mesh/grids/NeighborGridEntityGetter2D_impl.h>
 #include <mesh/grids/GridEntityMeasureGetter.h>
 using namespace std;
@@ -214,11 +214,11 @@ template< typename Real,
           typename Index >
 String Meshes::Grid< 2, Real, Device, Index > :: getType()
-   return tnlString( "Meshes::Grid< " ) +
-          tnlString( getMeshDimension() ) + ", " +
-          tnlString( ::getType< RealType >() ) + ", " +
-          tnlString( Device :: getDeviceType() ) + ", " +
-          tnlString( ::getType< IndexType >() ) + " >";
+   return TNL::String( "Meshes::Grid< " ) +
+          TNL::String( getMeshDimension() ) + ", " +
+          TNL::String( ::getType< RealType >() ) + ", " +
+          TNL::String( Device :: getDeviceType() ) + ", " +
+          TNL::String( ::getType< IndexType >() ) + " >";
 template< typename Real,
@@ -979,11 +979,11 @@ template< typename Real,
           typename Index >
 String Meshes::Grid< 2, Real, Device, Index > :: getType()
-   return tnlString( "Meshes::Grid< " ) +
-          tnlString( getMeshDimension() ) + ", " +
-          tnlString( ::getType< RealType >() ) + ", " +
-          tnlString( Device :: getDeviceType() ) + ", " +
-          tnlString( ::getType< IndexType >() ) + " >";
+   return TNL::String( "Meshes::Grid< " ) +
+          TNL::String( getMeshDimension() ) + ", " +
+          TNL::String( ::getType< RealType >() ) + ", " +
+          TNL::String( Device :: getDeviceType() ) + ", " +
+          TNL::String( ::getType< IndexType >() ) + " >";
 template< typename Real,
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h b/tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h
index d7841576e1e55be8d28c692ac39b1b6fbb7ffa47..aa8bd8d057309b1cd48fe38d71eab7886ccc0d7c 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnlTestGridEntity.h
@@ -53,25 +53,25 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi
       constexpr static int getDimension() { return meshDimension; };
-      typedef tnlStaticVector< meshDimension, IndexType > EntityOrientationType;
-      typedef tnlStaticVector< meshDimension, IndexType > EntityBasisType;
+      typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityOrientationType;
+      typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityBasisType;
       typedef tnlTestGridEntity< GridType, entityDimension, Config > ThisType;
-      //typedef tnlTestNeighbourGridEntitiesStorage< ThisType > NeighbourGridEntitiesStorageType;
+      //typedef tnlTestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType;
-      /*template< int NeighbourEntityDimension = entityDimension >
-      using NeighbourEntities = 
-         tnlTestNeighbourGridEntityGetter<
+      /*template< int NeighborEntityDimension = entityDimension >
+      using NeighborEntities = 
+         tnlTestNeighborGridEntityGetter<
             tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >,
                            Config >,
-            NeighbourEntityDimension >;*/
+            NeighborEntityDimension >;*/
       __cuda_callable__ inline
       tnlTestGridEntity( const GridType& grid )
       : grid( grid ),
         entityIndex( -1 )/*,
-        neighbourEntitiesStorage( *this )*/
+        neighborEntitiesStorage( *this )*/
          this->coordinates = CoordinatesType( ( Index ) 0 );
          this->orientation = EntityOrientationType( ( Index ) 0 );
@@ -87,7 +87,7 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi
       : grid( grid ),
         entityIndex( -1 ),
         coordinates( coordinates )/*,
-        neighbourEntitiesStorage( *this )*/
+        neighborEntitiesStorage( *this )*/
          this->orientation = EntityOrientationType( ( Index ) 0 );
          this->basis = EntityBasisType( ( Index ) 1 );
@@ -107,7 +107,7 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi
       EntityBasisType basis;
-      //NeighbourGridEntitiesStorageType neighbourEntitiesStorage;
+      //NeighborGridEntitiesStorageType neighborEntitiesStorage;
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h
index 4d58008765aa4c8e2f613caa6417590248eddc76..f066ef3e3d2ea95531ddbdcbf27714d2f551875c 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntitiesStorage.h
@@ -1,5 +1,5 @@
-                          tnlTestNeighbourGridEntitiesStorage.h  -  description
+                          tnlTestNeighborGridEntitiesStorage.h  -  description
     begin                : Dec 18, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -20,23 +20,23 @@
 #include <core/tnlCuda.h>
 #include <mesh/MeshDimensionTag.h>
-#include "tnlTestNeighbourGridEntityGetter.h"
+#include "tnlTestNeighborGridEntityGetter.h"
 template< typename GridEntity,
-          int NeighbourEntityDimension >
-class tnlTestNeighbourGridEntityLayer 
-: public tnlTestNeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1 >
+          int NeighborEntityDimension >
+class tnlTestNeighborGridEntityLayer 
+: public tnlTestNeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1 >
-      typedef tnlTestNeighbourGridEntityLayer< GridEntity, NeighbourEntityDimension - 1 > BaseType;
-      typedef tnlTestNeighbourGridEntityGetter< GridEntity, NeighbourEntityDimension > NeighbourEntityGetterType;
+      typedef tnlTestNeighborGridEntityLayer< GridEntity, NeighborEntityDimension - 1 > BaseType;
+      typedef tnlTestNeighborGridEntityGetter< GridEntity, NeighborEntityDimension > NeighborEntityGetterType;
-      using BaseType::getNeighbourEntities;
+      using BaseType::getNeighborEntities;
-      tnlTestNeighbourGridEntityLayer( const GridEntity& entity )
-      : neighbourEntities( entity ),
+      tnlTestNeighborGridEntityLayer( const GridEntity& entity )
+      : neighborEntities( entity ),
         BaseType( entity ) 
@@ -45,51 +45,51 @@ class tnlTestNeighbourGridEntityLayer
                     const typename GridEntity::GridType::IndexType& entityIndex )
          BaseType::refresh( grid, entityIndex );
-         neighbourEntities.refresh( grid, entityIndex );
+         neighborEntities.refresh( grid, entityIndex );
-      NeighbourEntityGetterType neighbourEntities;
+      NeighborEntityGetterType neighborEntities;
 template< typename GridEntity >
-class tnlTestNeighbourGridEntityLayer< GridEntity, 0 >
+class tnlTestNeighborGridEntityLayer< GridEntity, 0 >
-      typedef tnlTestNeighbourGridEntityGetter< GridEntity, 0 > NeighbourEntityGetterType;     
+      typedef tnlTestNeighborGridEntityGetter< GridEntity, 0 > NeighborEntityGetterType;     
-      tnlTestNeighbourGridEntityLayer( const GridEntity& entity )
-      : neighbourEntities( entity )
+      tnlTestNeighborGridEntityLayer( const GridEntity& entity )
+      : neighborEntities( entity )
       void refresh( const typename GridEntity::GridType& grid, 
                     const typename GridEntity::GridType::IndexType& entityIndex )
-         neighbourEntities.refresh( grid, entityIndex );
+         neighborEntities.refresh( grid, entityIndex );
-      NeighbourEntityGetterType neighbourEntities;
+      NeighborEntityGetterType neighborEntities;
 template< typename GridEntity >
-class tnlTestNeighbourGridEntitiesStorage
-: public tnlTestNeighbourGridEntityLayer< GridEntity, GridEntity::meshDimension >
+class tnlTestNeighborGridEntitiesStorage
+: public tnlTestNeighborGridEntityLayer< GridEntity, GridEntity::meshDimension >
-   typedef tnlTestNeighbourGridEntityLayer< GridEntity, GridEntity::meshDimension > BaseType;
+   typedef tnlTestNeighborGridEntityLayer< GridEntity, GridEntity::meshDimension > BaseType;
-      using BaseType::getNeighbourEntities;
+      using BaseType::getNeighborEntities;
-      tnlTestNeighbourGridEntitiesStorage( const GridEntity& entity )
+      tnlTestNeighborGridEntitiesStorage( const GridEntity& entity )
       : BaseType( entity )
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h
index 2aa6f25bb65be792da79252969b4583cf13269e7..521c58ff56902084a602e33bf131808e33d54862 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter.h
@@ -1,5 +1,5 @@
-                          tnlTestNeighbourGridEntityGetter.h  -  description
+                          tnlTestNeighborGridEntityGetter.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -21,17 +21,17 @@
 template< typename GridEntity,
-          int NeighbourEntityDimension,
+          int NeighborEntityDimension,
           typename EntityStencilTag = 
-            GridEntityStencilStorageTag< GridEntity::ConfigType::template neighbourEntityStorage< GridEntity >( NeighbourEntityDimension ) > >
-class tnlTestNeighbourGridEntityGetter
+            GridEntityStencilStorageTag< GridEntity::ConfigType::template neighborEntityStorage< GridEntity >( NeighborEntityDimension ) > >
+class tnlTestNeighborGridEntityGetter
       // TODO: not all specializations are implemented yet
-      tnlTestNeighbourGridEntityGetter( const GridEntity& entity )
+      tnlTestNeighborGridEntityGetter( const GridEntity& entity )
          //tnlTNL_ASSERT( false, );
@@ -50,7 +50,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class tnlTestNeighbourGridEntityGetter< 
+class tnlTestNeighborGridEntityGetter< 
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    StencilStorage >
@@ -58,17 +58,17 @@ class tnlTestNeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetter;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter;
       __cuda_callable__ inline
-      tnlTestNeighbourGridEntityGetter( const GridEntityType& entity )
+      tnlTestNeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
@@ -79,7 +79,7 @@ class tnlTestNeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //tnlTestNeighbourGridEntityGetter(){};      
+      //tnlTestNeighborGridEntityGetter(){};      
diff --git a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h
index cce9a7ec5a63111b1a0a06b6d4d0288c6de95312..23d35fdae1038c6ec82e65fd69febfb1b0c6779a 100644
--- a/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h
+++ b/tests/benchmarks/heat-equation-benchmark/tnlTestNeighbourGridEntityGetter2D_impl.h
@@ -1,5 +1,5 @@
-                          tnlTestNeighbourGridEntityGetter2D_impl.h  -  description
+                          tnlTestNeighborGridEntityGetter2D_impl.h  -  description
     begin                : Nov 23, 2015
     copyright            : (C) 2015 by Tomas Oberhuber
@@ -17,13 +17,13 @@
 #pragma once
-#include "tnlTestNeighbourGridEntityGetter.h"
+#include "tnlTestNeighborGridEntityGetter.h"
 #include <mesh/grids/Grid2D.h>
 #include <core/tnlStaticFor.h>
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              2            | No specialization |
  * +-----------------+---------------------------+-------------------+
@@ -33,7 +33,7 @@ template< typename Real,
           typename Index,
           typename Config,
           typename StencilStorage >
-class tnlTestNeighbourGridEntityGetter< 
+class tnlTestNeighborGridEntityGetter< 
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    StencilStorage >
@@ -41,17 +41,17 @@ class tnlTestNeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetter;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter;
       __cuda_callable__ inline
-      tnlTestNeighbourGridEntityGetter( const GridEntityType& entity )
+      tnlTestNeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
@@ -62,12 +62,12 @@ class tnlTestNeighbourGridEntityGetter<
       const GridEntityType& entity;
-      //tnlTestNeighbourGridEntityGetter(){};      
+      //tnlTestNeighborGridEntityGetter(){};      
  * +-----------------+---------------------------+-------------------+
- * | EntityDimenions | NeighbourEntityDimension |  Stencil Storage  |
+ * | EntityDimenions | NeighborEntityDimension |  Stencil Storage  |
  * +-----------------+---------------------------+-------------------+
  * |       2         |              2            |       Cross       |
  * +-----------------+---------------------------+-------------------+
@@ -76,7 +76,7 @@ template< typename Real,
           typename Device,
           typename Index,
           typename Config >
-class tnlTestNeighbourGridEntityGetter< 
+class tnlTestNeighborGridEntityGetter< 
    GridEntity< Meshes::Grid< 2, Real, Device, Index >, 2, Config >,
    GridEntityStencilStorageTag< GridEntityCrossStencil > >
@@ -84,22 +84,22 @@ class tnlTestNeighbourGridEntityGetter<
       static const int EntityDimension = 2;
-      static const int NeighbourEntityDimension = 2;
+      static const int NeighborEntityDimension = 2;
       typedef Meshes::Grid< 2, Real, Device, Index > GridType;
       typedef GridEntity< GridType, EntityDimension, Config > GridEntityType;
-      typedef GridEntity< GridType, NeighbourEntityDimension, Config > NeighbourGridEntityType;
+      typedef GridEntity< GridType, NeighborEntityDimension, Config > NeighborGridEntityType;
       typedef Real RealType;
       typedef Index IndexType;
       typedef typename GridType::CoordinatesType CoordinatesType;
-      typedef GridEntityGetter< GridType, NeighbourGridEntityType > GridEntityGetter;
+      typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter;
       typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage;
-      typedef tnlTestNeighbourGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
+      typedef tnlTestNeighborGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType;
       static const int stencilSize = Config::getStencilSize();
       __cuda_callable__ inline
-      tnlTestNeighbourGridEntityGetter( const GridEntityType& entity )
+      tnlTestNeighborGridEntityGetter( const GridEntityType& entity )
       : entity( entity )
@@ -110,9 +110,9 @@ class tnlTestNeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
+               neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index;
@@ -122,10 +122,10 @@ class tnlTestNeighbourGridEntityGetter<
-            static void exec( ThisType& neighbourEntityGetter, const IndexType& entityIndex )
+            static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex )
-               neighbourEntityGetter.stencilY[ index + stencilSize ] = 
-                  entityIndex + index * neighbourEntityGetter.entity.getMesh().getDimensions().x();
+               neighborEntityGetter.stencilY[ index + stencilSize ] = 
+                  entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x();
@@ -134,9 +134,9 @@ class tnlTestNeighbourGridEntityGetter<
       void refresh( const GridType& grid, const IndexType& entityIndex )
 #ifndef HAVE_CUDA // TODO: fix this to work with CUDA
-         tnlStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
-         tnlStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
-         tnlStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
+         StaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex );
+         StaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex );
+         StaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex );
@@ -147,6 +147,6 @@ class tnlTestNeighbourGridEntityGetter<
       IndexType stencilX[ 2 * stencilSize + 1 ];
       IndexType stencilY[ 2 * stencilSize + 1 ];
-      //tnlTestNeighbourGridEntityGetter(){};      
+      //tnlTestNeighborGridEntityGetter(){};      
diff --git a/tests/benchmarks/share/CMakeLists.txt b/tests/benchmarks/share/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/benchmarks/spmv.h b/tests/benchmarks/spmv.h
index 6fbd68dfb2bae574e47a05ea1f7e49f2b90532d4..c04a62eabe58a1474d5b9c8a2c361911d8762693 100644
--- a/tests/benchmarks/spmv.h
+++ b/tests/benchmarks/spmv.h
@@ -84,7 +84,7 @@ void setCudaTestMatrix( Matrix& matrix,
         setCudaTestMatrixKernel< Matrix >
             <<< cudaGridSize, cudaBlockSize >>>
             ( kernel_matrix, elementsPerRow, gridIdx );
-        checkCudaDevice;
     Devices::Cuda::freeFromDevice( kernel_matrix );
@@ -118,43 +118,25 @@ benchmarkSpMV( Benchmark & benchmark,
     parseObjectType( HostMatrix::getType(), parsedType );
     benchmark.createHorizontalGroup( parsedType[ 0 ], 2 );
-    if( ! hostRowLengths.setSize( size ) ||
-        ! hostMatrix.setDimensions( size, size ) ||
-        ! hostVector.setSize( size ) ||
-        ! hostVector2.setSize( size )
+    hostRowLengths.setSize( size );
+    hostMatrix.setDimensions( size, size );
+    hostVector.setSize( size );
+    hostVector2.setSize( size );
 #ifdef HAVE_CUDA
-        ||
-        ! deviceRowLengths.setSize( size ) ||
-        ! deviceMatrix.setDimensions( size, size ) ||
-        ! deviceVector.setSize( size ) ||
-        ! deviceVector2.setSize( size )
+    deviceRowLengths.setSize( size );
+    deviceMatrix.setDimensions( size, size );
+    deviceVector.setSize( size );
+    deviceVector2.setSize( size );
-        )
-    {
-        const char* msg = "error: allocation of vectors failed";
-        std::cerr << msg << std::endl;
-        benchmark.addErrorMessage( msg, 2 );
-        return false;
-    }
     hostRowLengths.setValue( elementsPerRow );
 #ifdef HAVE_CUDA
     deviceRowLengths.setValue( elementsPerRow );
-    if( ! hostMatrix.setCompressedRowLengths( hostRowLengths ) ) {
-        const char* msg = "error: allocation of host matrix failed";
-        std::cerr << msg << std::endl;
-        benchmark.addErrorMessage( msg, 2 );
-        return false;
-    }
+    hostMatrix.setCompressedRowLengths( hostRowLengths );
 #ifdef HAVE_CUDA
-    if( ! deviceMatrix.setCompressedRowLengths( deviceRowLengths ) ) {
-        const char* msg = "error: allocation of device matrix failed";
-        std::cerr << msg << std::endl;
-        benchmark.addErrorMessage( msg, 2 );
-        return false;
-    }
+    deviceMatrix.setCompressedRowLengths( deviceRowLengths );
     const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
diff --git a/tests/benchmarks/tnl-benchmark-spmv.h b/tests/benchmarks/tnl-benchmark-spmv.h
index 3fcb997c3f4d0711ca43d847b61aa8be89f7c1de..a7ebf68dc40e368929f7a5c284e65ead69845783 100644
--- a/tests/benchmarks/tnl-benchmark-spmv.h
+++ b/tests/benchmarks/tnl-benchmark-spmv.h
@@ -326,7 +326,6 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
          return false;
       const int rows = csrMatrix.getRows();
-      const int columns = csrMatrix.getColumns();
       const long int nonzeroElements = csrMatrix.getNumberOfMatrixElements();
       Containers::Vector< int, Devices::Host, int > rowLengthsHost;
       rowLengthsHost.setSize( rows );
@@ -363,160 +362,153 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
       typedef CSR< Real, Devices::Cuda, int > CSRCudaType;
       CSRCudaType cudaCSR;
       //cout << "Copying matrix to GPU... ";
-      if( ! cudaCSR.copyFrom( csrMatrix, rowLengthsCuda ) )
-      {
-         std::cerr << "I am not able to transfer the matrix on GPU." << std::endl;
-         writeTestFailed( logFile, 21 );
-      }
-      else
-      {
-         ::tnlCusparseCSR< Real > cusparseCSR;
-         cusparseCSR.init( cudaCSR, &cusparseHandle );
-         benchmarkMatrix( cusparseCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "Cusparse CSR",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cusparseDestroy( cusparseHandle );
+      cudaCSR = csrMatrix;
+      ::tnlCusparseCSR< Real > cusparseCSR;
+      cusparseCSR.init( cudaCSR, &cusparseHandle );
+      benchmarkMatrix( cusparseCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "Cusparse CSR",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cusparseDestroy( cusparseHandle );
-        std::cout << " done.   \r";
-         /*cudaCSR.setCudaKernelType( CSRCudaType::scalar );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Scalar",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaKernelType( CSRCudaType::vector );
-         cudaCSR.setCudaWarpSize( 1 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 1",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaWarpSize( 2 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 2",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaWarpSize( 4 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 4",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaWarpSize( 8 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 8",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaWarpSize( 16 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 16",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaWarpSize( 32 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Vector 32",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setCudaKernelType( CSRCudaType::hybrid );
-         cudaCSR.setHybridModeSplit( 2 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 2",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setHybridModeSplit( 4 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 4",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setHybridModeSplit( 8 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 8",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setHybridModeSplit( 16 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 16",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setHybridModeSplit( 32 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 32",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
-         cudaCSR.setHybridModeSplit( 64 );
-         benchmarkMatrix( cudaCSR,
-                          cudaX,
-                          cudaB,
-                          nonzeroElements,
-                          "CSR Cuda Hyrbid 64",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );*/
-      }
+      std::cout << " done.   \r";
+      /*cudaCSR.setCudaKernelType( CSRCudaType::scalar );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Scalar",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaKernelType( CSRCudaType::vector );
+      cudaCSR.setCudaWarpSize( 1 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 1",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaWarpSize( 2 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 2",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaWarpSize( 4 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 4",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaWarpSize( 8 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 8",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaWarpSize( 16 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 16",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaWarpSize( 32 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Vector 32",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setCudaKernelType( CSRCudaType::hybrid );
+      cudaCSR.setHybridModeSplit( 2 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 2",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setHybridModeSplit( 4 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 4",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setHybridModeSplit( 8 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 8",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setHybridModeSplit( 16 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 16",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setHybridModeSplit( 32 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 32",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaCSR.setHybridModeSplit( 64 );
+      benchmarkMatrix( cudaCSR,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "CSR Cuda Hyrbid 64",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );*/
@@ -524,138 +516,105 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
       double padding;
       typedef Ellpack< Real, Devices::Host, int > EllpackType;
       EllpackType ellpackMatrix;
-      if( ! ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) )
-         writeTestFailed( logFile, 7 );
-      else
-      {
-         allocatedElements = ellpackMatrix.getNumberOfMatrixElements();
-         padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
-         logFile << "    " << padding << std::endl;
-         benchmarkMatrix( ellpackMatrix,
-                          hostX,
-                          hostB,
-                          nonzeroElements,
-                          "Ellpack Host",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
+      Matrices::copySparseMatrix( ellpackMatrix, csrMatrix );
+      allocatedElements = ellpackMatrix.getNumberOfMatrixElements();
+      padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
+      logFile << "    " << padding << std::endl;
+      benchmarkMatrix( ellpackMatrix,
+                       hostX,
+                       hostB,
+                       nonzeroElements,
+                       "Ellpack Host",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
 #ifdef HAVE_CUDA
-         typedef Ellpack< Real, Devices::Cuda, int > EllpackCudaType;
-         EllpackCudaType cudaEllpack;
-        std::cout << "Copying matrix to GPU... ";
-         if( ! cudaEllpack.copyFrom( ellpackMatrix, rowLengthsCuda ) )
-         {
-            std::cerr << "I am not able to transfer the matrix on GPU." << std::endl;
-            writeTestFailed( logFile, 3 );
-         }
-         else
-         {
-           std::cout << " done.   \r";
-            benchmarkMatrix( cudaEllpack,
-                             cudaX,
-                             cudaB,
-                             nonzeroElements,
-                             "Ellpack Cuda",
-                             stopTime,
-                             baseline,
-                             verbose,
-                             logFile );
-         }
-         cudaEllpack.reset();
+      typedef Ellpack< Real, Devices::Cuda, int > EllpackCudaType;
+      EllpackCudaType cudaEllpack;
+      std::cout << "Copying matrix to GPU... ";
+      cudaEllpack = ellpackMatrix;
+      std::cout << " done.   \r";
+      benchmarkMatrix( cudaEllpack,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "Ellpack Cuda",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaEllpack.reset();
-         ellpackMatrix.reset();
-      }
+      ellpackMatrix.reset();
       typedef SlicedEllpack< Real, Devices::Host, int > SlicedEllpackType;
       SlicedEllpackType slicedEllpack;
-      if( ! slicedEllpack.copyFrom( csrMatrix, rowLengthsHost ) )
-         writeTestFailed( logFile, 7 );
-      else
-      {
-         allocatedElements = slicedEllpack.getNumberOfMatrixElements();
-         padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
-         logFile << "    " << padding << std::endl;
-         benchmarkMatrix( slicedEllpack,
-                          hostX,
-                          hostB,
-                          nonzeroElements,
-                          "SlicedEllpack Host",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
+      Matrices::copySparseMatrix( slicedEllpack, csrMatrix );
+      allocatedElements = slicedEllpack.getNumberOfMatrixElements();
+      padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
+      logFile << "    " << padding << std::endl;
+      benchmarkMatrix( slicedEllpack,
+                       hostX,
+                       hostB,
+                       nonzeroElements,
+                       "SlicedEllpack Host",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
 #ifdef HAVE_CUDA
-         typedef SlicedEllpack< Real, Devices::Cuda, int > SlicedEllpackCudaType;
-         SlicedEllpackCudaType cudaSlicedEllpack;
-        std::cout << "Copying matrix to GPU... ";
-         if( ! cudaSlicedEllpack.copyFrom( slicedEllpack, rowLengthsCuda ) )
-         {
-            std::cerr << "I am not able to transfer the matrix on GPU." << std::endl;
-            writeTestFailed( logFile, 3 );
-         }
-         else
-         {
-           std::cout << " done.   \r";
-            benchmarkMatrix( cudaSlicedEllpack,
-                             cudaX,
-                             cudaB,
-                             nonzeroElements,
-                             "SlicedEllpack Cuda",
-                             stopTime,
-                             baseline,
-                             verbose,
-                             logFile );
-         }
-         cudaSlicedEllpack.reset();
+      typedef SlicedEllpack< Real, Devices::Cuda, int > SlicedEllpackCudaType;
+      SlicedEllpackCudaType cudaSlicedEllpack;
+      std::cout << "Copying matrix to GPU... ";
+      cudaSlicedEllpack = slicedEllpack;
+      std::cout << " done.   \r";
+      benchmarkMatrix( cudaSlicedEllpack,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "SlicedEllpack Cuda",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaSlicedEllpack.reset();
-         slicedEllpack.reset();
-      }
+      slicedEllpack.reset();
       typedef ChunkedEllpack< Real, Devices::Host, int > ChunkedEllpackType;
       ChunkedEllpackType chunkedEllpack;
-      if( ! chunkedEllpack.copyFrom( csrMatrix, rowLengthsHost ) )
-         writeTestFailed( logFile, 7 );
-      else
-      {
-         allocatedElements = chunkedEllpack.getNumberOfMatrixElements();
-         padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
-         logFile << "    " << padding << std::endl;
-         benchmarkMatrix( chunkedEllpack,
-                          hostX,
-                          hostB,
-                          nonzeroElements,
-                          "ChunkedEllpack Host",
-                          stopTime,
-                          baseline,
-                          verbose,
-                          logFile );
+      Matrices::copySparseMatrix( chunkedEllpack, csrMatrix );
+      allocatedElements = chunkedEllpack.getNumberOfMatrixElements();
+      padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0;
+      logFile << "    " << padding << std::endl;
+      benchmarkMatrix( chunkedEllpack,
+                       hostX,
+                       hostB,
+                       nonzeroElements,
+                       "ChunkedEllpack Host",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
 #ifdef HAVE_CUDA
-         typedef ChunkedEllpack< Real, Devices::Cuda, int > ChunkedEllpackCudaType;
-         ChunkedEllpackCudaType cudaChunkedEllpack;
-        std::cout << "Copying matrix to GPU... ";
-         if( ! cudaChunkedEllpack.copyFrom( chunkedEllpack, rowLengthsCuda ) )
-         {
-            std::cerr << "I am not able to transfer the matrix on GPU." << std::endl;
-            writeTestFailed( logFile, 3 );
-         }
-         else
-         {
-           std::cout << " done.    \r";
-            benchmarkMatrix( cudaChunkedEllpack,
-                             cudaX,
-                             cudaB,
-                             nonzeroElements,
-                             "ChunkedEllpack Cuda",
-                             stopTime,
-                             baseline,
-                             verbose,
-                             logFile );
-         }
-         cudaChunkedEllpack.reset();
+      typedef ChunkedEllpack< Real, Devices::Cuda, int > ChunkedEllpackCudaType;
+      ChunkedEllpackCudaType cudaChunkedEllpack;
+      std::cout << "Copying matrix to GPU... ";
+      cudaChunkedEllpack = chunkedEllpack;
+      std::cout << " done.    \r";
+      benchmarkMatrix( cudaChunkedEllpack,
+                       cudaX,
+                       cudaB,
+                       nonzeroElements,
+                       "ChunkedEllpack Cuda",
+                       stopTime,
+                       baseline,
+                       verbose,
+                       logFile );
+      cudaChunkedEllpack.reset();
-         chunkedEllpack.reset();
-      }
+      chunkedEllpack.reset();
    return true;
diff --git a/tests/benchmarks/vector-operations.h b/tests/benchmarks/vector-operations.h
index 1c663ac20de0ecb1bfe20300787a43303b1f65bc..093243c07c1912919c0cd5baae1c0abb9f4a22b4 100644
--- a/tests/benchmarks/vector-operations.h
+++ b/tests/benchmarks/vector-operations.h
@@ -42,20 +42,12 @@ benchmarkVectorOperations( Benchmark & benchmark,
     HostVector hostVector, hostVector2;
     CudaVector deviceVector, deviceVector2;
-    if( ! hostVector.setSize( size ) ||
-        ! hostVector2.setSize( size )
+    hostVector.setSize( size );
+    hostVector2.setSize( size );
 #ifdef HAVE_CUDA
-        ||
-        ! deviceVector.setSize( size ) ||
-        ! deviceVector2.setSize( size )
+    deviceVector.setSize( size );
+    deviceVector2.setSize( size );
-        )
-    {
-        const char* msg = "error: allocation of vectors failed";
-        std::cerr << msg << std::endl;
-        benchmark.addErrorMessage( msg );
-        return false;
-    }
     Real resultHost, resultDevice;
diff --git a/tests/long-time-unit-tests/CMakeLists.txt b/tests/long-time-unit-tests/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/mic/CMakeLists.txt b/tests/mic/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e6d9e53437394174a3af73c6c63067a1633b229
--- /dev/null
+++ b/tests/mic/CMakeLists.txt
@@ -0,0 +1,9 @@
+    ADD_EXECUTABLE( tnlMICArrayTest${mpiExt}${debugExt} ${headers} tnlMICArrayTest.cpp )   
+    TARGET_LINK_LIBRARIES( tnlMICArrayTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
+                                                           tnl${mpiExt}${debugExt}-0.1 )
+    ADD_EXECUTABLE( tnlMICVectorTest${mpiExt}${debugExt} ${headers} tnlMICVectorTest.cpp )   
+    TARGET_LINK_LIBRARIES( tnlMICVectorTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
+                                                           tnl${mpiExt}${debugExt}-0.1 )
\ No newline at end of file
diff --git a/tests/mic/tnlMICArrayTest.cpp b/tests/mic/tnlMICArrayTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7c932fad62a86d9ffc0b42abdb5494a954940a5c
--- /dev/null
+++ b/tests/mic/tnlMICArrayTest.cpp
@@ -0,0 +1,205 @@
+                          tnlMICArrayTest.cpp  -  
+                application testing Array implemntation on MIC KNC
+                              by hanouvit 
+                             -------------------
+    copyright            : (C) 2004 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include <iostream>
+#include <TNL/Devices/MIC.h>
+#include <omp.h>
+#include <TNL/Containers/Array.h>
+	using namespace std;
+	using namespace TNL;
+	using namespace TNL::Containers;
+unsigned int errors=0;
+unsigned int success=0;
+#define TEST_TEST(a) if((a)){cout << __LINE__ <<":\t OK " <<endl;success++;}else{cout << __LINE__<<":\t FAIL" <<endl;errors++;}
+#define TEST_RESULT cout<<"SUCCES: "<<success<<endl<<"ERRRORS: "<<errors<<endl;
+inline void Test_Say( const char * message)
+	cout << message <<endl;
+using namespace std;
+int main(void)
+    cout << "Array on MIC test by hanouvit:" <<endl;
+	#ifdef HAVE_ICPC
+		cout << "ICPC in USE" <<endl;
+	#endif
+	#ifdef HAVE_MIC
+		cout << "MIC in USE" <<endl; //LOL
+	#endif
+#ifdef HAVE_MIC
+//prepare arrays with data
+	Array<double,Devices::MIC,int> aa(10);
+	Array<double,Devices::MIC,int> ee(6);
+	Array<double,Devices::Host,int> cc(5);
+//fill it 
+Devices::MICHider<double> data_ptr;
+data_ptr.pointer= aa.getData();	
+int size=aa.getSize();
+#pragma offload target(mic) in(data_ptr,size)
+    for(int i=0;i<size;i++)
+    {
+            data_ptr.pointer[i]=i;
+    }
+for(int i=0;i<5;i++)
+	cc[i]=10+i;
+//prepare arrays for funky tests
+Array<double,Devices::MIC,int> bb(10);
+Array<double,Devices::MIC,int> dd(0);
+Test_Say("Is aa filled correctly? (aa.getElement):");
+for(int i=0;i<10;i++)
+	TEST_TEST(aa.getElement(i)==i);
+Test_Say("Copy to bb(MIC->MIC) (=):");
+for(int i=0;i<bb.getSize();i++)
+	TEST_TEST(bb.getElement(i)==i);
+Test_Say("Copy (Host -> MIC) (=)");
+for(int i=0;i<bb.getSize();i++)
+	TEST_TEST(bb.getElement(i)==i+10);
+for(int i=0;i<bb.getSize();i++)
+	TEST_TEST(bb.getElement(i)==5);
+for(int i=0;i<aa.getSize();i++)
+	TEST_TEST(aa.getElement(i)==5);
+for(int i=0;i<bb.getSize();i++)
+	TEST_TEST(bb.getElement(i)==i);
+Test_Say("(MIC -> MIC) ==");
+Test_Say("(Host -> MIC) !=");
+Test_Say("bidn (light test)");
+//Mylsím, že není zdaleka testováno vše...
+Test_Say("File Array Test: \n");
+//prepare arrays with data
+//fill it UP
+/*Devices::MICHider<double> data_ptr;*/
+data_ptr.pointer= aa.getData();	
+#pragma offload target(mic) in(data_ptr,size)
+    for(int i=0;i<size;i++)
+    {
+            data_ptr.pointer[i]=i;
+    }
+for(int i=0;i<5;i++)
+	cc[i]=10+i;
+File soubor;
+TEST_TEST( 10 == aa.getSize())
+for(int i=0;i<5;i++)
+	TEST_TEST(aa.getElement(i)==i)
+for(int i=5;i<10;i++)
+	TEST_TEST(aa.getElement(i)==i+5)
+TEST_TEST( 10 == cc.getSize())
+for(int i=0;i<cc.getSize();i++)
+	TEST_TEST(cc.getElement(i)==i)
+    return 0;
diff --git a/tests/mic/tnlMICVectorTest.cpp b/tests/mic/tnlMICVectorTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..058715b208c229e58b1d50a98e0c747304f679cd
--- /dev/null
+++ b/tests/mic/tnlMICVectorTest.cpp
@@ -0,0 +1,150 @@
+                          tnlMICVectorTest.cpp  -  
+                application testing Vector implemntation on MIC KNC
+                              by hanouvit 
+                             -------------------
+    copyright            : (C) 2004 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+/* See Copyright Notice in tnl/Copyright */
+#include <iostream>
+#include <omp.h>
+#include <stdint.h>
+#include <TNL/Devices/MIC.h>
+#include <TNL/Containers/Vector.h>
+using namespace std;
+using namespace TNL;
+using namespace TNL::Containers;
+unsigned int errors=0;
+unsigned int success=0;
+#define TEST_TEST(a) if((a)){cout << __LINE__ <<":\t OK" <<endl;success++;}else{cout << __LINE__<<":\t FAIL" <<endl;errors++;}
+#define TEST_RESULT cout<<"SUCCES: "<<success<<endl<<"ERRRORS: "<<errors<<endl;
+inline void Test_Say( const char * message)
+	cout << message <<endl;
+int main(void)
+    cout << "Vector on MIC test by hanouvit:" <<endl;
+    #ifdef HAVE_ICPC
+            cout << "ICPC in USE" <<endl; 
+    #endif
+    #ifdef HAVE_MIC
+            cout << "MIC in USE" <<endl;
+    #endif
+#ifdef HAVE_MIC
+        Vector<double,Devices::MIC,int> aa(10);
+        Vector<double,Devices::MIC,int> bb(10);
+        Vector<double,Devices::MIC,int> cc(10);
+        Vector<double,Devices::Host,int> aaa(10);
+        Vector<double,Devices::Host,int> bbb(10);
+        Vector<double,Devices::Host,int> ccc(10);
+        for(int i=0;i<10;i++)
+        {
+            aa.setElement(i,i-5);
+            aaa.setElement(i,i-5);
+            bb.setElement(i,5-i);
+            bbb.setElement(i,5-i);
+            cc.setElement(i,10+i);
+            ccc.setElement(i,10+i);
+        }
+        Test_Say("Is filled correctly?:");
+        for(int i=0;i<10;i++)
+        {
+            TEST_TEST(aa.getElement(i)==aaa.getElement(i));
+            TEST_TEST(bb.getElement(i)==bbb.getElement(i));
+            TEST_TEST(cc.getElement(i)==ccc.getElement(i));
+        }
+        Test_Say("min():");
+           TEST_TEST(bb.min()==bbb.min());        
+        Test_Say("absMin():");
+           TEST_TEST(bb.absMin()==bbb.absMin());
+        Test_Say("max():");
+           TEST_TEST(bb.max()==bbb.max());
+        Test_Say("absMax():");
+           TEST_TEST(bb.absMax()==bbb.absMax());
+        Test_Say("lpNorm( N ):");
+           TEST_TEST(aa.lpNorm(1)==aaa.lpNorm(1));
+           TEST_TEST(aa.lpNorm(2)==aaa.lpNorm(2));
+           TEST_TEST(aa.lpNorm(0.5)==aaa.lpNorm(0.5));
+           TEST_TEST(aa.lpNorm(3)==aaa.lpNorm(3));
+        Test_Say("sum():");
+           TEST_TEST(aa.sum()==aaa.sum());
+        Test_Say("differenceMax():");
+           TEST_TEST(aa.differenceMax(bb)==aaa.differenceMax(bbb));
+        Test_Say("differenceMin():");
+           TEST_TEST(aa.differenceMin(bb)==aaa.differenceMin(bbb));
+        Test_Say("differenceAbsMax():");
+           TEST_TEST(aa.differenceAbsMax(bb)==aaa.differenceAbsMax(bbb));
+        Test_Say("differenceAbsMin():");
+           TEST_TEST(aa.differenceAbsMin(bb)==aaa.differenceAbsMin(bbb));
+        Test_Say("differenceSum():");
+           TEST_TEST(aa.differenceSum(bb)==aaa.differenceSum(bbb));
+        ////
+        Test_Say("differenceLpNorm( N ):");
+           TEST_TEST(aa.differenceLpNorm(bb,1)==aaa.differenceLpNorm(bbb,1));
+           TEST_TEST(aa.differenceLpNorm(bb,2)==aaa.differenceLpNorm(bbb,2));
+           TEST_TEST(aa.differenceLpNorm(bb,0.5)==aaa.differenceLpNorm(bbb,0.5));
+           TEST_TEST(aa.differenceLpNorm(bb,3)==aaa.differenceLpNorm(bbb,3));
+        ////
+        Test_Say("== :");
+            TEST_TEST(aa==aaa);
+        Test_Say("vct*0.5 :");
+        aa*=0.5;
+        aaa*=0.5;
+            TEST_TEST(aa==aaa);
+        Test_Say("scalarProduct :");
+            TEST_TEST(aa.scalarProduct(bb) == aaa.scalarProduct(bbb));
+        Test_Say("addVector :");
+        aa.addVector(bb,2.0,3.0);
+        aaa.addVector(bbb,2.0,3.0);
+            TEST_TEST(aa==aaa);            
+        aa.addVectors(bb,2.0,cc,1.0,-3.0);
+        aaa.addVectors(bbb,2.0,ccc,1.0,-3.0);
+            TEST_TEST(aa==aaa); 
+        Test_Say("computeExclusivePrefixSum :");    
+        aa.computeExclusivePrefixSum();
+        aaa.computeExclusivePrefixSum();
+            TEST_TEST(aa==aaa);             
+        bb.computeExclusivePrefixSum(2,4);
+        bbb.computeExclusivePrefixSum(2,4);
+            TEST_TEST(bb==bbb); 
+        Test_Say("computePrefixSum :");    
+        cc.computePrefixSum();
+        ccc.computePrefixSum();
+            TEST_TEST(cc==ccc); 
+        cc.computePrefixSum(2,4);
+        ccc.computePrefixSum(2,4);
+            TEST_TEST(cc==ccc); 	
+    return 0;
diff --git a/tests/mpi/Functions.h b/tests/mpi/Functions.h
index 21cfeb9276740887960be16cb686be660e7374ae..c74be9e63a70e359eda3fbe5110ed7b9b8c5c52e 100644
--- a/tests/mpi/Functions.h
+++ b/tests/mpi/Functions.h
@@ -80,8 +80,6 @@ class FunctionToEvaluate<Real,2> : public Functions::Domain< 2, Functions::MeshD
 		 //return meshEntity.getCoordinates().y()*10+meshEntity.getCoordinates().x();
 		 return meshEntity.getCenter().y()*100+meshEntity.getCenter().x();
diff --git a/tests/mpi/MeshFunctionEvauateTest.cpp b/tests/mpi/MeshFunctionEvauateTest.cpp
index fc83f44dc4d55af3812c6917821d55082e59d971..7ab5a7f6ad044f183e308f8cc90fe13f5b35e46d 100644
--- a/tests/mpi/MeshFunctionEvauateTest.cpp
+++ b/tests/mpi/MeshFunctionEvauateTest.cpp
@@ -18,7 +18,7 @@ using namespace std;
 #include <TNL/Timer.h>
-#define OUTPUT 
+//#define OUTPUT 
 #include "Functions.h"
diff --git a/tests/mpi/a.out b/tests/mpi/a.out
deleted file mode 100755
index 0228d30c7f9574335ed795dc630c0485e3d15ea2..0000000000000000000000000000000000000000
Binary files a/tests/mpi/a.out and /dev/null differ
diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/core/cuda/CMakeLists.txt b/tests/unit-tests/core/cuda/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/core/cuda/tnlCudaReductionTester.h b/tests/unit-tests/core/cuda/tnlCudaReductionTester.h
index 193b95a6808b7995d49324ecc179e1ae000287de..313063b22d690c50aa375bc01a7a489500448a99 100644
--- a/tests/unit-tests/core/cuda/tnlCudaReductionTester.h
+++ b/tests/unit-tests/core/cuda/tnlCudaReductionTester.h
@@ -95,7 +95,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       for( int i = 0; i < size; i ++ )
          hostData[ i ] = value;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< RealType, RealType, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
    template< typename RealType >
@@ -105,7 +105,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       RealType *hostData, *deviceData;
       ArrayOperations< Devices::Host >::allocateMemory( hostData, shortSequence );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, shortSequence );
-      CPPUNIT_ASSERT( checkCudaDevice );
       RealType result;
@@ -149,7 +149,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::freeMemory( hostData );
       ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
-      CPPUNIT_ASSERT( checkCudaDevice );
    template< typename RealType >
@@ -159,7 +159,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       RealType *hostData, *deviceData;
       ArrayOperations< Devices::Host >::allocateMemory( hostData, longSequence );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, longSequence );
-      CPPUNIT_ASSERT( checkCudaDevice );
       RealType result;
@@ -237,7 +237,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::freeMemory( hostData );
       ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
-      CPPUNIT_ASSERT( checkCudaDevice );
    template< typename RealType >
@@ -247,7 +247,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       RealType *hostData, *deviceData;
       ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       RealType sum( 0.0 );
       for( int i = 0; i < size; i ++ )
@@ -256,7 +256,7 @@ class CudaReductionTester : public CppUnit :: TestCase
          sum += hostData[ i ];
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< RealType, RealType, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       tnlParallelReductionSum< RealType, int > sumOperation;
       RealType result;
@@ -289,7 +289,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::freeMemory( hostData );
       ArrayOperations< Devices::Cuda >::freeMemory( deviceData );
-      CPPUNIT_ASSERT( checkCudaDevice );
    template< typename Type >
@@ -299,13 +299,13 @@ class CudaReductionTester : public CppUnit :: TestCase
       Type *hostData, *deviceData;
       ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
          hostData[ i ] = 1;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       tnlParallelReductionLogicalAnd< Type, int > andOperation;
       tnlParallelReductionLogicalOr< Type, int > orOperation;
@@ -319,7 +319,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       hostData[ 0 ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( andOperation, size, deviceData, ( Type* ) 0, result ) ) );
       CPPUNIT_ASSERT( result == 0 );
@@ -331,7 +331,7 @@ class CudaReductionTester : public CppUnit :: TestCase
          hostData[ i ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( andOperation, size, deviceData, ( Type* ) 0, result ) ) );
       CPPUNIT_ASSERT( result == 0 );
@@ -347,13 +347,13 @@ class CudaReductionTester : public CppUnit :: TestCase
       Type *hostData, *deviceData;
       ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
          hostData[ i ] = 1;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       tnlParallelReductionLogicalAnd< Type, int > andOperation;
       tnlParallelReductionLogicalOr< Type, int > orOperation;
@@ -367,7 +367,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       hostData[ 0 ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( andOperation, size, deviceData, ( Type* ) 0, result ) ) );
       CPPUNIT_ASSERT( result == 0 );
@@ -379,7 +379,7 @@ class CudaReductionTester : public CppUnit :: TestCase
          hostData[ i ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData, hostData, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( andOperation, size, deviceData, ( Type* ) 0, result ) ) );
       CPPUNIT_ASSERT( result == 0 );
@@ -398,13 +398,13 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData1, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
          hostData1[ i ] = hostData2[ i ] = 1;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData2, hostData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       bool result( false );
       tnlParallelReductionEqualities< Type, int > equalityOperation;
@@ -420,7 +420,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       hostData1[ 0 ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( equalityOperation, size, deviceData1, deviceData2, result ) ) );
@@ -433,7 +433,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       for( int i = 0; i < size; i ++ )
          hostData1[ i ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( equalityOperation, size, deviceData1, deviceData2, result ) ) );
@@ -454,13 +454,13 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData1, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
          hostData1[ i ] = hostData2[ i ] = 1;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData2, hostData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       bool result( false );
       tnlParallelReductionEqualities< Type, int > equalityOperation;
@@ -476,7 +476,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       hostData1[ 0 ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( equalityOperation, size, deviceData1, deviceData2, result ) ) );
@@ -489,7 +489,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       for( int i = 0; i < size; i ++ )
          hostData1[ i ] = 0;
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
           ( reductionOnCudaDevice( equalityOperation, size, deviceData1, deviceData2, result ) ) );
@@ -510,7 +510,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData1, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       hostData1[ 0 ] = 0;
       hostData2[ 0 ] = 1;
@@ -523,7 +523,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData2, hostData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       Type result( 0.0 );
       tnlParallelReductionScalarProduct< Type, int > scalarProductOperation;
@@ -544,7 +544,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Host >::allocateMemory( hostData2, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData1, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       hostData1[ 0 ] = 0;
       hostData2[ 0 ] = 1;
@@ -557,7 +557,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData1, hostData1, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceData2, hostData2, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       Type result( 0.0 );
       tnlParallelReductionScalarProduct< Type, int > scalarProductOperation;
@@ -579,7 +579,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceZeros, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceOnes, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceLinear, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
@@ -591,7 +591,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceZeros, hostZeros, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceOnes, hostOnes, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceLinear, hostLinear, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       tnlParallelReductionDiffSum< Type, int > diffSumOp;
       tnlParallelReductionDiffMin< Type, int > diffMinOp;
@@ -696,7 +696,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceZeros, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceOnes, size );
       ArrayOperations< Devices::Cuda >::allocateMemory( deviceLinear, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < size; i ++ )
@@ -708,7 +708,7 @@ class CudaReductionTester : public CppUnit :: TestCase
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceZeros, hostZeros, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceOnes, hostOnes, size );
       ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< Type, Type, int >( deviceLinear, hostLinear, size );
-      CPPUNIT_ASSERT( checkCudaDevice );
       tnlParallelReductionDiffSum< Type, int > diffSumOp;
       tnlParallelReductionDiffMin< Type, int > diffMinOp;
diff --git a/tests/unit-tests/core/cuda/tnlCudaTester.h b/tests/unit-tests/core/cuda/tnlCudaTester.h
index 5c2024cbcc5dcdb70c4ad90b15d70d400bf36649..3cb033c53b7812510f22d3d1b976b9eeee6bd267 100644
--- a/tests/unit-tests/core/cuda/tnlCudaTester.h
+++ b/tests/unit-tests/core/cuda/tnlCudaTester.h
@@ -59,7 +59,7 @@ class Devices::CudaTester : public CppUnit :: TestCase
       blockSize. x = 1;
       gridSize. x = 1;
       simpleKernel<<< gridSize, blockSize >>>();
-      if( ! checkCudaDevice )
+      if( ! TNL_CHECK_CUDA_DEVICE )
          std::cerr << "Test with simple kernel failed. It seems that the CUDA device does not work properly." << std::endl;
          CPPUNIT_ASSERT( false );
diff --git a/tests/unit-tests/core/multimaps/tnlIndexMultimapTester.h b/tests/unit-tests/core/multimaps/tnlIndexMultimapTester.h
index 2e077e9c3d9057e90819073cf48f56743e113096..236a608e5601791fec44f134a00436629e4352d5 100644
--- a/tests/unit-tests/core/multimaps/tnlIndexMultimapTester.h
+++ b/tests/unit-tests/core/multimaps/tnlIndexMultimapTester.h
@@ -208,7 +208,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFastTestCudaKernel< IndexMultimapType >
                                                             <<< cudaGridSize, cudaBlockSize >>>
@@ -217,7 +217,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -271,7 +271,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFast_DiagonalIndexMultimapTestCudaKernel< IndexMultimapType >
                                                                            <<< cudaGridSize, cudaBlockSize >>>
@@ -280,7 +280,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -355,7 +355,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFast_DenseIndexMultimapTestCudaKernel1< IndexMultimapType >
                                                                          <<< cudaGridSize, cudaBlockSize >>>
@@ -364,7 +364,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -390,7 +390,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFast_DenseIndexMultimapTestCudaKernel2< IndexMultimapType >
                                                                          <<< cudaGridSize, cudaBlockSize >>>
@@ -399,7 +399,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -469,7 +469,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFast_LowerTriangularIndexMultimapTestCudaKernel1< IndexMultimapType >
                                                                                    <<< cudaGridSize, cudaBlockSize >>>
@@ -478,7 +478,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -504,7 +504,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          tnlIndexMultimapTester__setElementFast_LowerTriangularIndexMultimapTestCudaKernel2< IndexMultimapType >
                                                                                    <<< cudaGridSize, cudaBlockSize >>>
@@ -513,7 +513,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -615,7 +615,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          tnlIndexMultimapTester__setRowFast_DiagonalIndexMultimapTestCudaKernel< IndexMultimapType >
@@ -625,7 +625,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -722,7 +722,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          tnlIndexMultimapTester__setRowFast_DenseIndexMultimapTestCudaKernel1< IndexMultimapType >
@@ -732,7 +732,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -762,7 +762,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          tnlIndexMultimapTester__setRowFast_DenseIndexMultimapTestCudaKernel2< IndexMultimapType >
@@ -772,7 +772,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -863,7 +863,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          tnlIndexMultimapTester__setRowFast_LowerTriangularIndexMultimapTestCudaKernel< IndexMultimapType >
@@ -873,7 +873,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -903,7 +903,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          IndexMultimapType* kernel_graph = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          tnlIndexMultimapTester__setRowFast_LowerTriangularIndexMultimapTestCudaKernel< IndexMultimapType >
@@ -913,7 +913,7 @@ class tnlIndexMultimapTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_graph );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
diff --git a/tests/unit-tests/matrices/CMakeLists.txt b/tests/unit-tests/matrices/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/matrices/tnlDenseMatrixTester.h b/tests/unit-tests/matrices/tnlDenseMatrixTester.h
index 7e7d0db2ec32600180c573b8ce569f85eb213363..b2c89e27fea7c0fa04371daab0d0f251f3d3cd2a 100644
--- a/tests/unit-tests/matrices/tnlDenseMatrixTester.h
+++ b/tests/unit-tests/matrices/tnlDenseMatrixTester.h
@@ -100,11 +100,11 @@ class DenseTester : public CppUnit :: TestCase
 #ifdef HAVE_CUDA
          MatrixType* kernel_m = Devices::Cuda::passToDevice( m );
-         CPPUNIT_ASSERT( checkCudaDevice );
          setElementFastTestKernel<<< 1, 16 >>>( kernel_m );
-         CPPUNIT_ASSERT( checkCudaDevice );
          Devices::Cuda::freeFromDevice( kernel_m );
-         CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < 10; i++ )
@@ -145,11 +145,11 @@ class DenseTester : public CppUnit :: TestCase
 #ifdef HAVE_CUDA
          MatrixType* kernel_m = Devices::Cuda::passToDevice( m );
-         CPPUNIT_ASSERT( checkCudaDevice );
          addElementFastTestKernel<<< 1, 128 >>>( kernel_m );
-         CPPUNIT_ASSERT( checkCudaDevice );
          Devices::Cuda::freeFromDevice( kernel_m );
-         CPPUNIT_ASSERT( checkCudaDevice );
       for( int i = 0; i < 10; i++ )
@@ -216,11 +216,11 @@ class DenseTester : public CppUnit :: TestCase
 #ifdef HAVE_CUDA
          MatrixType* kernel_m = Devices::Cuda::passToDevice( m );
-         CPPUNIT_ASSERT( checkCudaDevice );
          setRowFastTestKernel<<< 1, 128 >>>( kernel_m, columns.getData(), values.getData(), ( IndexType ) 10 );
-         CPPUNIT_ASSERT( checkCudaDevice );
          Devices::Cuda::freeFromDevice( kernel_m );
-         CPPUNIT_ASSERT( checkCudaDevice );
diff --git a/tests/unit-tests/matrices/tnlSparseMatrixTester.h b/tests/unit-tests/matrices/tnlSparseMatrixTester.h
index 0bf1da58c83c8df00f55ff6066525cae28e2899d..b2b29c0e4c254a23756488fd57b1a65d151b6f50 100644
--- a/tests/unit-tests/matrices/tnlSparseMatrixTester.h
+++ b/tests/unit-tests/matrices/tnlSparseMatrixTester.h
@@ -206,7 +206,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFastTestCudaKernel< MatrixType >
                                                             <<< cudaGridSize, cudaBlockSize >>>
@@ -215,7 +215,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -269,7 +269,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFast_DiagonalMatrixTestCudaKernel< MatrixType >
                                                                            <<< cudaGridSize, cudaBlockSize >>>
@@ -278,7 +278,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -353,7 +353,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFast_DenseTestCudaKernel1< MatrixType >
                                                                          <<< cudaGridSize, cudaBlockSize >>>
@@ -362,7 +362,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -388,7 +388,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFast_DenseTestCudaKernel2< MatrixType >
                                                                          <<< cudaGridSize, cudaBlockSize >>>
@@ -397,7 +397,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -467,7 +467,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFast_LowerTriangularMatrixTestCudaKernel1< MatrixType >
                                                                                    <<< cudaGridSize, cudaBlockSize >>>
@@ -476,7 +476,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -502,7 +502,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          SparseTester__setElementFast_LowerTriangularMatrixTestCudaKernel2< MatrixType >
                                                                                    <<< cudaGridSize, cudaBlockSize >>>
@@ -511,7 +511,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -613,7 +613,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          SparseTester__setRowFast_DiagonalMatrixTestCudaKernel< MatrixType >
@@ -623,7 +623,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -720,7 +720,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          SparseTester__setRowFast_DenseTestCudaKernel1< MatrixType >
@@ -730,7 +730,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -760,7 +760,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          SparseTester__setRowFast_DenseTestCudaKernel2< MatrixType >
@@ -770,7 +770,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -861,7 +861,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          SparseTester__setRowFast_LowerTriangularMatrixTestCudaKernel< MatrixType >
@@ -871,7 +871,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
@@ -901,7 +901,7 @@ class SparseTester : public CppUnit :: TestCase
          MatrixType* kernel_matrix = Devices::Cuda::passToDevice( m );
          bool testResult( true );
          bool* kernel_testResult = Devices::Cuda::passToDevice( testResult );
-         checkCudaDevice;
          dim3 cudaBlockSize( 256 ), cudaGridSize( 1 );
          int sharedMemory = 100 * ( sizeof( IndexType ) + sizeof( RealType ) );
          SparseTester__setRowFast_LowerTriangularMatrixTestCudaKernel< MatrixType >
@@ -911,7 +911,7 @@ class SparseTester : public CppUnit :: TestCase
          CPPUNIT_ASSERT( Devices::Cuda::passFromDevice( kernel_testResult ) );
          Devices::Cuda::freeFromDevice( kernel_matrix );
          Devices::Cuda::freeFromDevice( kernel_testResult );
-         checkCudaDevice;
diff --git a/tests/unit-tests/mesh/CMakeLists.txt b/tests/unit-tests/mesh/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/mesh/tnlGrid2DTester.h b/tests/unit-tests/mesh/tnlGrid2DTester.h
index bb493c54d1cd51f9346bed34f5aa44d002fbec13..fcb9b47131f90684d0538d0d67a3842073ca1dd3 100644
--- a/tests/unit-tests/mesh/tnlGrid2DTester.h
+++ b/tests/unit-tests/mesh/tnlGrid2DTester.h
@@ -174,29 +174,29 @@ class GridTester< 2, RealType, Device, IndexType >: public CppUnit :: TestCase
                const CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( -1, 0 ) );
                const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-               auto neighbourEntities = cell.getNeighbourEntities();
-               CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< -1, 0 >() ) );
+               auto neighborEntities = cell.getNeighborEntities();
+               CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< -1, 0 >() ) );
             if( cell.getCoordinates().x() < xSize - 1 )
                const CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 1, 0 ) );
                const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-               auto neighbourEntities = cell.getNeighbourEntities();
-               CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 1, 0 >() ) );
+               auto neighborEntities = cell.getNeighborEntities();
+               CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 1, 0 >() ) );
             if( cell.getCoordinates().y() > 0 )
                const CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, -1 ) );
                const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-               auto neighbourEntities = cell.getNeighbourEntities();
-               CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, -1 >() ) );
+               auto neighborEntities = cell.getNeighborEntities();
+               CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, -1 >() ) );
             if( cell.getCoordinates().y() < ySize - 1 )
                const CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, 1 ) );
                const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-               auto neighbourEntities = cell.getNeighbourEntities();
-               CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, 1 >() ) );
+               auto neighborEntities = cell.getNeighborEntities();
+               CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, 1 >() ) );
@@ -224,35 +224,35 @@ class GridTester< 2, RealType, Device, IndexType >: public CppUnit :: TestCase
             //const IndexType cellIndex = grid.getEntityIndex( cell );
             cell.refresh(); //setIndex( cellIndex );
-            auto neighbourEntities = cell.template getNeighbourEntities< GridType::Face::entityDimension >();
+            auto neighborEntities = cell.template getNeighborEntities< GridType::Face::entityDimension >();
             FaceType face1( grid,
                             EntityOrientationType( -1, 0 ),
                             EntityBasisType( 0, 1 ) );
             IndexType face1Index = grid.template getEntityIndex( face1 );
-            CPPUNIT_ASSERT( ( face1Index == neighbourEntities.template getEntityIndex< -1, 0 >() ) );
+            CPPUNIT_ASSERT( ( face1Index == neighborEntities.template getEntityIndex< -1, 0 >() ) );
             FaceType face2( grid,
                             cell.getCoordinates() + CoordinatesType( 1, 0 ),
                             EntityOrientationType( 1, 0 ),
                             EntityBasisType( 0, 1 ) );
             IndexType face2Index = grid.template getEntityIndex( face2 );
-            CPPUNIT_ASSERT( ( face2Index == neighbourEntities.template getEntityIndex< 1, 0 >() ) );
+            CPPUNIT_ASSERT( ( face2Index == neighborEntities.template getEntityIndex< 1, 0 >() ) );
             FaceType face3( grid,
                             EntityOrientationType( 0, -1 ),
                             EntityBasisType( 1, 0 ) );
             IndexType face3Index = grid.template getEntityIndex( face3 );
-            CPPUNIT_ASSERT( ( face3Index == neighbourEntities.template getEntityIndex< 0, -1 >() ) );
+            CPPUNIT_ASSERT( ( face3Index == neighborEntities.template getEntityIndex< 0, -1 >() ) );
             FaceType face4( grid,
                             cell.getCoordinates() + CoordinatesType( 0, 1 ),
                             EntityOrientationType( 0, 1 ),
                             EntityBasisType( 1, 0 ) );
             IndexType face4Index = grid.template getEntityIndex( face4 );
-            CPPUNIT_ASSERT( ( face4Index == neighbourEntities.template getEntityIndex< 0, 1 >() ) );
+            CPPUNIT_ASSERT( ( face4Index == neighborEntities.template getEntityIndex< 0, 1 >() ) );
@@ -281,20 +281,20 @@ class GridTester< 2, RealType, Device, IndexType >: public CppUnit :: TestCase
                face.setOrientation( EntityOrientationType( 1, 0 ) );
                //const IndexType faceIndex = grid.getEntityIndex( face );
                face.refresh(); //setIndex( faceIndex );
-               auto neighbourCells = face.template getNeighbourEntities< GridType::Cell::entityDimension >();
+               auto neighborCells = face.template getNeighborEntities< GridType::Cell::entityDimension >();
                if( face.getCoordinates().x() > 0 )
                   CellType cell( grid, face.getCoordinates() + CoordinatesType( -1, 0 ) );
                   IndexType cellIndex = grid.getEntityIndex( cell );
-                  CPPUNIT_ASSERT( ( cellIndex == neighbourCells.template getEntityIndex< -1, 0 >() ) );
+                  CPPUNIT_ASSERT( ( cellIndex == neighborCells.template getEntityIndex< -1, 0 >() ) );
                if( face.getCoordinates().x() < xSize )
                   CellType cell( grid, face.getCoordinates() + CoordinatesType( 0, 0 ) );
                   IndexType cellIndex = grid.getEntityIndex( cell );
-                  CPPUNIT_ASSERT( ( cellIndex == neighbourCells.template getEntityIndex< 1, 0 >() ) );
+                  CPPUNIT_ASSERT( ( cellIndex == neighborCells.template getEntityIndex< 1, 0 >() ) );
             if( face.getCoordinates().x() < xSize )
@@ -302,19 +302,19 @@ class GridTester< 2, RealType, Device, IndexType >: public CppUnit :: TestCase
                face.setOrientation( EntityOrientationType( 0, 1 ) );
                //const IndexType faceIndex = grid.getEntityIndex( face );
                face.refresh();//setIndex( faceIndex );
-               auto neighbourCells = face.template getNeighbourEntities< GridType::Cell::entityDimension >();
+               auto neighborCells = face.template getNeighborEntities< GridType::Cell::entityDimension >();
                if( face.getCoordinates().y() > 0 )
                   CellType cell( grid, face.getCoordinates() + CoordinatesType( 0, -1 ) );
                   IndexType cellIndex = grid.getEntityIndex( cell );
-                  CPPUNIT_ASSERT( ( cellIndex == neighbourCells.template getEntityIndex< 0, -1 >() ) );
+                  CPPUNIT_ASSERT( ( cellIndex == neighborCells.template getEntityIndex< 0, -1 >() ) );
                if( face.getCoordinates().y() < ySize )
                   CellType cell( grid, face.getCoordinates() + CoordinatesType( 0, 0 ) );
                   IndexType cellIndex = grid.getEntityIndex( cell );
-                  CPPUNIT_ASSERT( ( cellIndex == neighbourCells.template getEntityIndex< 0, 1 >() ) );
+                  CPPUNIT_ASSERT( ( cellIndex == neighborCells.template getEntityIndex< 0, 1 >() ) );
diff --git a/tests/unit-tests/mesh/tnlGrid3DTester.h b/tests/unit-tests/mesh/tnlGrid3DTester.h
index 5890ff641302a5bf4bb75a580d718026d5d6b563..fed97012f5fcb2305e6ec0d3d582e7c6e8f98231 100644
--- a/tests/unit-tests/mesh/tnlGrid3DTester.h
+++ b/tests/unit-tests/mesh/tnlGrid3DTester.h
@@ -291,43 +291,43 @@ class GridTester< 3, RealType, Device, IndexType >: public CppUnit :: TestCase
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( -1, 0, 0 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< -1, 0, 0 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< -1, 0, 0 >() ) );
                if( cell.getCoordinates().x() < xSize - 1 )
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 1, 0, 0 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 1, 0, 0 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 1, 0, 0 >() ) );
                if( cell.getCoordinates().y() > 0 )
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, -1, 0 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, -1, 0 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, -1, 0 >() ) );
                if( cell.getCoordinates().y() < ySize - 1 )
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, 1, 0 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, 1, 0 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, 1, 0 >() ) );
                if( cell.getCoordinates().z() > 0 )
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, 0, -1 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, 0, -1 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, 0, -1 >() ) );
                if( cell.getCoordinates().z() < zSize - 1 )
                   CellType auxCell( grid, cell.getCoordinates() + CoordinatesType( 0, 0, 1 ) );
                   const IndexType auxCellIndex = grid.getEntityIndex( auxCell );
-                  auto neighbourEntities = cell.getNeighbourEntities();
-                  CPPUNIT_ASSERT( ( auxCellIndex == neighbourEntities.template getEntityIndex< 0, 0, 1 >() ) );
+                  auto neighborEntities = cell.getNeighborEntities();
+                  CPPUNIT_ASSERT( ( auxCellIndex == neighborEntities.template getEntityIndex< 0, 0, 1 >() ) );
@@ -357,44 +357,44 @@ class GridTester< 3, RealType, Device, IndexType >: public CppUnit :: TestCase
                //const IndexType cellIndex = grid.getEntityIndex( cell );
                cell.refresh();//setIndex( cellIndex );
-               auto neighbourEntities = cell.template getNeighbourEntities< GridType::Face::entityDimension >();
+               auto neighborEntities = cell.template getNeighborEntities< GridType::Face::entityDimension >();
                face.setCoordinates( cell.getCoordinates() );
                face.setOrientation( EntityOrientationType( 1, 0, 0 ) );
                //CoordinatesType faceCoordinates( i, j, k );
                IndexType faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< -1, 0, 0 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< -1, 0, 0 >() ) );
                //faceCoordinates = CoordinatesType( i + 1, j, k );
                face.setCoordinates( cell.getCoordinates() + CoordinatesType( 1, 0, 0 ) );
                face.setOrientation( EntityOrientationType( 1, 0 , 0 ) );
                faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< 1, 0, 0 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< 1, 0, 0 >() ) );
                //faceCoordinates = CoordinatesType( i, j, k );
                face.setCoordinates( cell.getCoordinates() );
                face.setOrientation( EntityOrientationType( 0, -1, 0 ) );
                faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< 0, -1, 0 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< 0, -1, 0 >() ) );
                //faceCoordinates = CoordinatesType( i, j + 1, k );
                face.setCoordinates( cell.getCoordinates() + CoordinatesType( 0, 1, 0 ) );
                face.setOrientation( EntityOrientationType( 0, 1, 0 ) );
                faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< 0, 1, 0 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< 0, 1, 0 >() ) );
                //faceCoordinates = CoordinatesType( i, j, k );
                face.setCoordinates( cell.getCoordinates() );
                face.setOrientation( EntityOrientationType( 0, 0, -1 ) );
                faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< 0, 0, -1 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< 0, 0, -1 >() ) );
                //faceCoordinates = CoordinatesType( i, j, k + 1 );
                face.setCoordinates( cell.getCoordinates() + CoordinatesType( 0, 0, 1 ) );
                face.setOrientation( EntityOrientationType( 0, 0, 1 ) );
                faceIndex = grid.getEntityIndex( face );
-               CPPUNIT_ASSERT( ( faceIndex == neighbourEntities.template getEntityIndex< 0, 0, 1 >() ) );
+               CPPUNIT_ASSERT( ( faceIndex == neighborEntities.template getEntityIndex< 0, 0, 1 >() ) );
@@ -428,20 +428,20 @@ class GridTester< 3, RealType, Device, IndexType >: public CppUnit :: TestCase
                   face.setOrientation( EntityOrientationType( 1, 0, 0  ) );
                   //const IndexType faceIndex = grid.getEntityIndex( face );
                   face.refresh();//setIndex( faceIndex );
-                  auto neighbourEntities = face.template getNeighbourEntities< GridType::Cell::entityDimension >();
+                  auto neighborEntities = face.template getNeighborEntities< GridType::Cell::entityDimension >();
                   if( face.getCoordinates().x() > 0 )
                      CellType cell( grid, face.getCoordinates() + CoordinatesType( -1, 0, 0 ) );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< -1, 0, 0 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< -1, 0, 0 >() ) );
                   if( face.getCoordinates().x() < xSize )
                      CellType cell( grid, face.getCoordinates() );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< 1, 0, 0 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< 1, 0, 0 >() ) );
                if( face.getCoordinates().x() < xSize && face.getCoordinates().z() < zSize )
@@ -449,19 +449,19 @@ class GridTester< 3, RealType, Device, IndexType >: public CppUnit :: TestCase
                   face.setOrientation( EntityOrientationType( 0, 1, 0  ) );
                   //const IndexType faceIndex = grid.getEntityIndex( face );
                   face.refresh();//setIndex( faceIndex );
-                  auto neighbourEntities = face.template getNeighbourEntities< GridType::Cell::entityDimension >();
+                  auto neighborEntities = face.template getNeighborEntities< GridType::Cell::entityDimension >();
                   if( face.getCoordinates().y() > 0 )
                      CellType cell( grid, face.getCoordinates() + CoordinatesType( 0, -1, 0 ) );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< 0, -1, 0 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< 0, -1, 0 >() ) );
                   if( face.getCoordinates().y() < ySize )
                      CellType cell( grid, face.getCoordinates() );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< 0, 1, 0 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< 0, 1, 0 >() ) );
                if( face.getCoordinates().x() < xSize && face.getCoordinates().y() < ySize )
@@ -469,19 +469,19 @@ class GridTester< 3, RealType, Device, IndexType >: public CppUnit :: TestCase
                   face.setOrientation( EntityOrientationType( 0, 0, 1  ) );
                   //const IndexType faceIndex = grid.getEntityIndex( face );
                   face.refresh();//setIndex( faceIndex );
-                  auto neighbourEntities = face.template getNeighbourEntities< GridType::Cell::entityDimension >();
+                  auto neighborEntities = face.template getNeighborEntities< GridType::Cell::entityDimension >();
                   if( face.getCoordinates().z() > 0 )
                      CellType cell( grid, face.getCoordinates() + CoordinatesType( 0, 0, -1 ) );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< 0, 0, -1 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< 0, 0, -1 >() ) );
                   if( face.getCoordinates().z() < zSize )
                      CellType cell( grid, face.getCoordinates() );
                      IndexType cellIndex = grid.getEntityIndex( cell );
-                     CPPUNIT_ASSERT( ( cellIndex == neighbourEntities.template getEntityIndex< 0, 0, 1 >() ) );
+                     CPPUNIT_ASSERT( ( cellIndex == neighborEntities.template getEntityIndex< 0, 0, 1 >() ) );
diff --git a/tests/unit-tests/operators/CMakeLists.txt b/tests/unit-tests/operators/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/operators/diffusion/CMakeLists.txt b/tests/unit-tests/operators/diffusion/CMakeLists.txt
index 5ad9a0af6f04b4798a5075bd6cd22b6191dd05a0..5d09ac10a1aeb9daca61a35f5075c8b60c1fad5d 100644
--- a/tests/unit-tests/operators/diffusion/CMakeLists.txt
+++ b/tests/unit-tests/operators/diffusion/CMakeLists.txt
@@ -10,8 +10,7 @@ if( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnlLinearDiffusionTest-cuda${mpiExt}${debugExt} ${headers} tnlLinearDiffusionTest.cu )
    TARGET_LINK_LIBRARIES( tnlLinearDiffusionTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
-   CUDA_ADD_EXECUTABLE( tnlOneSidedMeanCurvatureTest-cuda${mpiExt}${debugExt} ${headers} tnlOneSidedMeanCurvatureTest.cu
-                        OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnlOneSidedMeanCurvatureTest-cuda${mpiExt}${debugExt} ${headers} tnlOneSidedMeanCurvatureTest.cu )
    TARGET_LINK_LIBRARIES( tnlOneSidedMeanCurvatureTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
diff --git a/tests/unit-tests/operators/fdm/CMakeLists.txt b/tests/unit-tests/operators/fdm/CMakeLists.txt
old mode 100755
new mode 100644
index a85d068b485abd21f1c29e27746c722684d9929e..ad2395de3c40fc3c5c49221a1b023e744f74fceb
--- a/tests/unit-tests/operators/fdm/CMakeLists.txt
+++ b/tests/unit-tests/operators/fdm/CMakeLists.txt
@@ -3,8 +3,7 @@ TARGET_LINK_LIBRARIES( tnlFiniteDifferencesTest${mpiExt}${debugExt} ${CPPUNIT_LI
                                                                  tnl${mpiExt}${debugExt}-0.1 )
 if( BUILD_CUDA )                                                           
-   CUDA_ADD_EXECUTABLE( tnlFiniteDifferencesTest-cuda${mpiExt}${debugExt} ${headers} tnlFiniteDifferencesTest.cu
-                        OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnlFiniteDifferencesTest-cuda${mpiExt}${debugExt} ${headers} tnlFiniteDifferencesTest.cu )
    TARGET_LINK_LIBRARIES( tnlFiniteDifferencesTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
diff --git a/tests/unit-tests/operators/geometric/CMakeLists.txt b/tests/unit-tests/operators/geometric/CMakeLists.txt
index 71d62e19415fc60329923ad0e8a8247c82286800..dac9802609b798141ae4c1ed7fbc8b5ce01f088e 100644
--- a/tests/unit-tests/operators/geometric/CMakeLists.txt
+++ b/tests/unit-tests/operators/geometric/CMakeLists.txt
@@ -11,20 +11,15 @@ TARGET_LINK_LIBRARIES( tnlCoFVMGradientNormTest${mpiExt}${debugExt} ${CPPUNIT_LI
                                                                  tnl${mpiExt}${debugExt}-0.1 )
 if( BUILD_CUDA )                                                           
-   CUDA_ADD_EXECUTABLE( tnlFDMGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlFDMGradientNormTest.cu
-                        OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnlFDMGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlFDMGradientNormTest.cu )
    TARGET_LINK_LIBRARIES( tnlFDMGradientNormTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
-   CUDA_ADD_EXECUTABLE( tnlTwoSidedGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlTwoSidedGradientNormTest.cu
-                        OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnlTwoSidedGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlTwoSidedGradientNormTest.cu )
    TARGET_LINK_LIBRARIES( tnlTwoSidedGradientNormTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
-   CUDA_ADD_EXECUTABLE( tnlCoFVMGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlCoFVMGradientNormTest.cu
-                        OPTIONS ${CUDA_ADD_EXECUTABLE_OPTIONS} )
+   CUDA_ADD_EXECUTABLE( tnlCoFVMGradientNormTest-cuda${mpiExt}${debugExt} ${headers} tnlCoFVMGradientNormTest.cu )
    TARGET_LINK_LIBRARIES( tnlCoFVMGradientNormTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                                           tnl${mpiExt}${debugExt}-0.1 )
diff --git a/tests/unit-tests/solver/CMakeLists.txt b/tests/unit-tests/solver/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/tests/unit-tests/solver/pde/CMakeLists.txt b/tests/unit-tests/solver/pde/CMakeLists.txt
old mode 100755
new mode 100644