From 8458e9cfd922150be3a37cb8311f9a6fff5bb742 Mon Sep 17 00:00:00 2001
From: Tomas Sobotik <sobotto4@fjfi.cvut.cz>
Date: Tue, 12 Apr 2016 13:40:18 +0200
Subject: [PATCH] fast-sweeping-map solver added, narrow band in progress.

---
 examples/CMakeLists.txt                       |    1 +
 examples/fast-sweeping-map/CMakeLists.txt     |   22 +
 examples/fast-sweeping-map/MainBuildConfig.h  |   64 +
 .../fast-sweeping-map/fastSweepingMapConfig.h |   39 +
 examples/fast-sweeping-map/main.cpp           |   17 +
 examples/fast-sweeping-map/main.cu            |   17 +
 examples/fast-sweeping-map/main.h             |   88 ++
 .../fast-sweeping-map/tnlFastSweepingMap.h    |  186 +++
 .../tnlFastSweepingMap2D_CUDA_v4_impl.h       | 1035 +++++++++++++++
 .../tnlFastSweepingMap2D_impl.h               |  927 ++++++++++++++
 .../tnlFastSweepingMap_CUDA.h                 |  196 +++
 examples/narrow-band/CMakeLists.txt           |   22 +
 examples/narrow-band/MainBuildConfig.h        |   64 +
 examples/narrow-band/main.cpp                 |   17 +
 examples/narrow-band/main.cu                  |   17 +
 examples/narrow-band/main.h                   |   88 ++
 examples/narrow-band/narrowBandConfig.h       |   40 +
 examples/narrow-band/tnlNarrowBand.h          |  186 +++
 .../tnlNarrowBand2D_CUDA_v4_impl.h            | 1109 +++++++++++++++++
 examples/narrow-band/tnlNarrowBand2D_impl.h   |  927 ++++++++++++++
 .../narrow-band/tnlNarrowBand3D_CUDA_impl.h   |  961 ++++++++++++++
 examples/narrow-band/tnlNarrowBand3D_impl.h   |  307 +++++
 examples/narrow-band/tnlNarrowBand_CUDA.h     |  192 +++
 23 files changed, 6522 insertions(+)
 create mode 100755 examples/fast-sweeping-map/CMakeLists.txt
 create mode 100644 examples/fast-sweeping-map/MainBuildConfig.h
 create mode 100644 examples/fast-sweeping-map/fastSweepingMapConfig.h
 create mode 100644 examples/fast-sweeping-map/main.cpp
 create mode 100644 examples/fast-sweeping-map/main.cu
 create mode 100644 examples/fast-sweeping-map/main.h
 create mode 100644 examples/fast-sweeping-map/tnlFastSweepingMap.h
 create mode 100644 examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
 create mode 100644 examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
 create mode 100644 examples/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
 create mode 100755 examples/narrow-band/CMakeLists.txt
 create mode 100644 examples/narrow-band/MainBuildConfig.h
 create mode 100644 examples/narrow-band/main.cpp
 create mode 100644 examples/narrow-band/main.cu
 create mode 100644 examples/narrow-band/main.h
 create mode 100644 examples/narrow-band/narrowBandConfig.h
 create mode 100644 examples/narrow-band/tnlNarrowBand.h
 create mode 100644 examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
 create mode 100644 examples/narrow-band/tnlNarrowBand2D_impl.h
 create mode 100644 examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
 create mode 100644 examples/narrow-band/tnlNarrowBand3D_impl.h
 create mode 100644 examples/narrow-band/tnlNarrowBand_CUDA.h

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index ee2a49b1ee..c633cf9936 100755
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory( navier-stokes )
 add_subdirectory( hamilton-jacobi-parallel )
 add_subdirectory( fast-sweeping )
 add_subdirectory( hamilton-jacobi-parallel-map )
+add_subdirectory( fast-sweeping-map )
diff --git a/examples/fast-sweeping-map/CMakeLists.txt b/examples/fast-sweeping-map/CMakeLists.txt
new file mode 100755
index 0000000000..73cc05e264
--- /dev/null
+++ b/examples/fast-sweeping-map/CMakeLists.txt
@@ -0,0 +1,22 @@
+set( tnl_fast_sweeping_map_SOURCES
+#     MainBuildConfig.h
+#     tnlFastSweepingMap2D_impl.h
+#     tnlFastSweepingMap.h
+#     fastSweepingMapConfig.h 
+     main.cpp)
+
+
+IF(  BUILD_CUDA ) 
+	CUDA_ADD_EXECUTABLE(fast-sweeping-map${debugExt} main.cu)
+ELSE(  BUILD_CUDA )                
+	ADD_EXECUTABLE(fast-sweeping-map${debugExt} main.cpp)
+ENDIF( BUILD_CUDA )
+target_link_libraries (fast-sweeping-map${debugExt} tnl${debugExt}-${tnlVersion} )
+
+
+INSTALL( TARGETS fast-sweeping-map${debugExt}
+         RUNTIME DESTINATION bin
+         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
+        
+#INSTALL( FILES ${tnl_fast_sweeping_map_SOURCES}
+#         DESTINATION share/tnl-${tnlVersion}/examples/fast-sweeping-map )
diff --git a/examples/fast-sweeping-map/MainBuildConfig.h b/examples/fast-sweeping-map/MainBuildConfig.h
new file mode 100644
index 0000000000..1b904c0c8b
--- /dev/null
+++ b/examples/fast-sweeping-map/MainBuildConfig.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          MainBuildConfig.h  -  description
+                             -------------------
+    begin                : Jul 7, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MAINBUILDCONFIG_H_
+#define MAINBUILDCONFIG_H_
+
+#include <solvers/tnlBuildConfigTags.h>
+
+class MainBuildConfig
+{
+   public:
+
+      static void print() { cerr << "MainBuildConfig" << endl; }
+};
+
+/****
+ * Turn off support for float and long double.
+ */
+template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
+template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
+
+/****
+ * Turn off support for short int and long int indexing.
+ */
+template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
+template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
+
+/****
+ * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
+ */
+template< int Dimensions, typename Real, typename Device, typename Index >
+   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
+      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
+                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
+                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
+                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
+
+/****
+ * Please, chose your preferred time discretisation  here.
+ */
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
+
+/****
+ * Only the Runge-Kutta-Merson solver is enabled by default.
+ */
+template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
+
+#endif /* MAINBUILDCONFIG_H_ */
diff --git a/examples/fast-sweeping-map/fastSweepingMapConfig.h b/examples/fast-sweeping-map/fastSweepingMapConfig.h
new file mode 100644
index 0000000000..c00b9fafce
--- /dev/null
+++ b/examples/fast-sweeping-map/fastSweepingMapConfig.h
@@ -0,0 +1,39 @@
+/***************************************************************************
+                          fastSweepingConfig.h  -  description
+                             -------------------
+    begin                : Oct 15, 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+    email                :
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef FASTSWEEPINGCONFIG_H_
+#define FASTSWEEPINGCONFIG_H_
+
+#include <config/tnlConfigDescription.h>
+
+template< typename ConfigTag >
+class fastSweepingMapConfig
+{
+   public:
+      static void configSetup( tnlConfigDescription& config )
+      {
+         config.addDelimiter( "Parallel Eikonal solver settings:" );
+         config.addEntry        < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
+         config.addRequiredEntry        < tnlString > ( "initial-condition", "Initial condition for solver");
+         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
+         config.addEntry       < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" );
+         config.addEntry       < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
+         config.addRequiredEntry        < tnlString > ( "map", "Gradient map for solver");
+      }
+};
+
+#endif /* FASTSWEEPINGCONFIG_H_ */
diff --git a/examples/fast-sweeping-map/main.cpp b/examples/fast-sweeping-map/main.cpp
new file mode 100644
index 0000000000..8849008ff6
--- /dev/null
+++ b/examples/fast-sweeping-map/main.cpp
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cpp  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/fast-sweeping-map/main.cu b/examples/fast-sweeping-map/main.cu
new file mode 100644
index 0000000000..8849008ff6
--- /dev/null
+++ b/examples/fast-sweeping-map/main.cu
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cpp  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/fast-sweeping-map/main.h b/examples/fast-sweeping-map/main.h
new file mode 100644
index 0000000000..4aee944c0f
--- /dev/null
+++ b/examples/fast-sweeping-map/main.h
@@ -0,0 +1,88 @@
+/***************************************************************************
+                          main.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include "MainBuildConfig.h"
+	//for HOST versions:
+//#include "tnlFastSweepingMap.h"
+	//for DEVICE versions:
+#include "tnlFastSweepingMap_CUDA.h"
+#include "fastSweepingMapConfig.h"
+#include <solvers/tnlBuildConfigTags.h>
+
+#include <mesh/tnlGrid.h>
+#include <core/tnlDevice.h>
+#include <time.h>
+#include <ctime>
+
+typedef MainBuildConfig BuildConfig;
+
+int main( int argc, char* argv[] )
+{
+	time_t start;
+	time_t stop;
+	time(&start);
+	std::clock_t start2= std::clock();
+   tnlParameterContainer parameters;
+   tnlConfigDescription configDescription;
+   fastSweepingMapConfig< BuildConfig >::configSetup( configDescription );
+
+   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
+      return false;
+
+   const int& dim = parameters.getParameter< int >( "dim" );
+
+   if(dim == 2)
+   {
+		tnlFastSweepingMap<tnlGrid<2,double,tnlHost, int>, double, int> solver;
+		if(!solver.init(parameters))
+	   {
+			cerr << "Solver failed to initialize." << endl;
+			return EXIT_FAILURE;
+	   }
+		checkCudaDevice;
+	   cout << "-------------------------------------------------------------" << endl;
+	   cout << "Starting solver..." << endl;
+	   solver.run();
+   }
+//   else if(dim == 3)
+//   {
+//		tnlFastSweepingMap<tnlGrid<3,double,tnlHost, int>, double, int> solver;
+//		if(!solver.init(parameters))
+//	   {
+//			cerr << "Solver failed to initialize." << endl;
+//			return EXIT_FAILURE;
+//	   }
+//		checkCudaDevice;
+//	   cout << "-------------------------------------------------------------" << endl;
+//	   cout << "Starting solver..." << endl;
+//	   solver.run();
+//   }
+   else
+   {
+	   cerr << "Unsupported number of dimensions: " << dim << "!" << endl;
+	   return EXIT_FAILURE;
+   }
+
+
+   time(&stop);
+   cout << "Solver stopped..." << endl;
+   cout << endl;
+   cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) << endl;
+   return EXIT_SUCCESS;
+}
+
+
diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap.h b/examples/fast-sweeping-map/tnlFastSweepingMap.h
new file mode 100644
index 0000000000..32d9537a82
--- /dev/null
+++ b/examples/fast-sweeping-map/tnlFastSweepingMap.h
@@ -0,0 +1,186 @@
+/***************************************************************************
+                          tnlFastSweepingMap.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLFASTSWEEPING_H_
+#define TNLFASTSWEEPING_H_
+
+#include <config/tnlParameterContainer.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlStaticVector.h>
+#include <functions/tnlMeshFunction.h>
+#include <core/tnlHost.h>
+#include <mesh/tnlGrid.h>
+#include <mesh/grids/tnlGridEntity.h>
+#include <limits.h>
+#include <core/tnlDevice.h>
+#include <ctime>
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#endif
+
+
+
+
+template< typename Mesh,
+		  typename Real,
+		  typename Index >
+class tnlFastSweepingMap
+{};
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 2, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+
+	tnlFastSweepingMap();
+
+	static tnlString getType();
+	bool init( const tnlParameterContainer& parameters );
+
+	bool initGrid();
+	bool run();
+
+	//for single core version use this implementation:
+	void updateValue(const Index i, const Index j);
+	//for parallel version use this one instead:
+//	void updateValue(const Index i, const Index j, DofVectorType* grid);
+
+
+	void setupSquare1000(Index i, Index j);
+	void setupSquare1100(Index i, Index j);
+	void setupSquare1010(Index i, Index j);
+	void setupSquare1001(Index i, Index j);
+	void setupSquare1110(Index i, Index j);
+	void setupSquare1101(Index i, Index j);
+	void setupSquare1011(Index i, Index j);
+	void setupSquare1111(Index i, Index j);
+	void setupSquare0000(Index i, Index j);
+	void setupSquare0100(Index i, Index j);
+	void setupSquare0010(Index i, Index j);
+	void setupSquare0001(Index i, Index j);
+	void setupSquare0110(Index i, Index j);
+	void setupSquare0101(Index i, Index j);
+	void setupSquare0011(Index i, Index j);
+	void setupSquare0111(Index i, Index j);
+
+	Real fabsMin(const Real x, const Real y);
+
+
+protected:
+
+	MeshType Mesh;
+
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector, dofVector2;
+	DofVectorType data;
+
+	RealType h;
+
+	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
+
+
+#ifdef HAVE_OPENMP
+//	omp_lock_t* gridLock;
+#endif
+
+
+};
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 3, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+	tnlFastSweepingMap();
+
+	static tnlString getType();
+	bool init( const tnlParameterContainer& parameters );
+
+	bool initGrid();
+	bool run();
+
+	//for single core version use this implementation:
+	void updateValue(const Index i, const Index j, const Index k);
+	//for parallel version use this one instead:
+//	void updateValue(const Index i, const Index j, DofVectorType* grid);
+
+	Real fabsMin(const Real x, const Real y);
+
+
+protected:
+
+	MeshType Mesh;
+
+	bool exactInput;
+
+
+	tnlMeshFunction<MeshType> dofVector, dofVector2;
+	DofVectorType data;
+
+	RealType h;
+
+	tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity;
+
+#ifdef HAVE_OPENMP
+//	omp_lock_t* gridLock;
+#endif
+
+
+};
+
+
+	//for single core version use this implementation:
+#include "tnlFastSweepingMap2D_impl.h"
+	//for parallel version use this one instead:
+// #include "tnlFastSweepingMap2D_openMP_impl.h"
+
+//											#include "tnlFastSweepingMap3D_impl.h"
+
+#endif /* TNLFASTSWEEPING_H_ */
diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
new file mode 100644
index 0000000000..00e83fa402
--- /dev/null
+++ b/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
@@ -0,0 +1,1035 @@
+/***************************************************************************
+                          tnlFastSweepingMap2D_CUDA_v4_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLFASTSWEEPING2D_IMPL_H_
+#define TNLFASTSWEEPING2D_IMPL_H_
+
+#include "tnlFastSweepingMap.h"
+
+#define MAP_SOLVER_MAX_VALUE 150
+
+__device__
+double fabsMin( double x, double y)
+{
+	double fx = abs(x);
+
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+}
+
+__device__
+double atomicFabsMin(double* address, double val)
+{
+	unsigned long long int* address_as_ull =
+						  (unsigned long long int*)address;
+	unsigned long long int old = *address_as_ull, assumed;
+	do {
+		assumed = old;
+			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
+	} while (assumed != old);
+	return __longlong_as_double(old);
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlFastSweepingMap< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap()
+:dofVector(Mesh)
+{
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+	const tnlString& mapFile = parameters.getParameter <tnlString>("map");
+	if(! this->map.load( mapFile ))
+		cout << "Failed to load map file : " << mapFile << endl;
+
+	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
+	//Entity.refresh();
+	counter = 0;
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+
+
+#ifdef HAVE_CUDA
+
+	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(map_cuda), this->map.getSize()*sizeof(double));
+	cudaMemcpy(map_cuda, this->map.getData(), this->map.getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(changed), sizeof(int));
+	//counter == 0 --> setting changed to 0
+	cudaMemcpy(changed, &counter, sizeof(int), cudaMemcpyHostToDevice);
+
+
+	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
+	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
+
+#endif
+
+	int n = Mesh.getDimensions().x();
+	dim3 threadsPerBlock(16, 16);
+	dim3 numBlocks(n/16 + 1 ,n/16 +1);
+
+
+	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+	return true;
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	int n = Mesh.getDimensions().x();
+	dim3 threadsPerBlock(1, 512);
+	dim3 numBlocks(4,1);
+
+	int run = 1;
+	int zero = 0;
+
+	while(run != 0)
+	{
+		cudaMemcpy(this->changed, &zero, sizeof(int), cudaMemcpyHostToDevice);
+		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0, this->changed);
+		cudaMemcpy(&run, this->changed,sizeof(int), cudaMemcpyDeviceToHost);
+	}
+
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+	//data.setLike(dofVector.getData());
+	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+	cudaDeviceSynchronize();
+	cudaFree(cudaDofVector);
+	cudaFree(cudaDofVector2);
+	cudaFree(cudaSolver);
+	//data.save("u-00001.tnl");
+	dofVector.save("u-00001.tnl");
+	cudaDeviceSynchronize();
+	return true;
+}
+
+
+
+
+#ifdef HAVE_CUDA
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index* something_changed)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+
+	if(map_cuda[Entity.getIndex()] != 0.0)
+	{
+		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		Real value = cudaDofVector2[Entity.getIndex()];
+		Real im = 1.0/map_cuda[Entity.getIndex()];
+		Real a,b, tmp;
+
+		if( i == 0 )
+			a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		else if( i == Mesh.getDimensions().x() - 1 )
+			a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		else
+		{
+			a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
+					 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+		}
+
+		if( j == 0 )
+			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		else if( j == Mesh.getDimensions().y() - 1 )
+			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		else
+		{
+			b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
+					 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+		}
+
+
+		if(abs(a-b) >= im*h)
+			tmp = fabsMin(a,b) + Sign(value)*im*h;
+		else
+			tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) );
+
+	//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
+		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
+
+		if(abs(abs(value)-abs(tmp)) > 0.1*h)
+			atomicMax(something_changed,1);
+	}
+	else
+	{
+		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), MAP_SOLVER_MAX_VALUE);
+	}
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
+{
+	int i = threadIdx.x + blockDim.x*blockIdx.x;
+	int j = blockDim.y*blockIdx.y + threadIdx.y;
+
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+
+	int gid = Entity.getIndex();
+
+	cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]);
+//
+//	if(abs(cudaDofVector[gid]) < 1.01*h)
+//		cudaDofVector2[gid] = cudaDofVector[gid];
+
+
+
+
+
+	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() )
+	{
+		if(cudaDofVector[Entity.getIndex()] > 0)
+		{
+			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1111(i,j);
+					else
+						setupSquare1110(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1101(i,j);
+					else
+						setupSquare1100(i,j);
+				}
+			}
+			else
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1011(i,j);
+					else
+						setupSquare1010(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1001(i,j);
+					else
+						setupSquare1000(i,j);
+				}
+			}
+		}
+		else
+		{
+			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0111(i,j);
+					else
+						setupSquare0110(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0101(i,j);
+					else
+						setupSquare0100(i,j);
+				}
+			}
+			else
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0011(i,j);
+					else
+						setupSquare0010(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0001(i,j);
+					else
+						setupSquare0000(i,j);
+				}
+			}
+		}
+
+	}
+
+	return true;
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = abs(x);
+	//Real fy = abs(y);
+
+	//Real tmpMin = Min(fx,abs(y));
+
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+
+
+}
+
+
+
+__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i, int* changed)
+{
+
+	__shared__ int something_changed;
+	if(threadIdx.x+threadIdx.y == 0)
+		something_changed = 0;
+
+	int gx = 0;
+	int gy = threadIdx.y;
+	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
+	//	return;
+	int n = solver->Mesh.getDimensions().x();
+	int blockCount = n/blockDim.y +1;
+	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
+	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
+
+	//int id1 = gx+gy;
+	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
+
+	if(blockIdx.x==0)
+	{
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,&something_changed);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==1)
+	{
+		gx=n-1;
+		gy=threadIdx.y;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,&something_changed);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==2)
+	{
+		gx=0;
+		gy=n-threadIdx.y-1;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,&something_changed);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==3)
+	{
+		gx=n-1;
+		gy=n-threadIdx.y-1;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,&something_changed);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+
+
+	if(threadIdx.x+threadIdx.y == 0)
+		atomicMax(changed, something_changed);
+
+
+
+
+}
+
+
+__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+{
+
+
+	int gx = threadIdx.x + blockDim.x*blockIdx.x;
+	int gy = blockDim.y*blockIdx.y + threadIdx.y;
+
+
+	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
+	{
+		solver->initGrid();
+	}
+
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+}
+#endif
+
+
+
+
+#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
new file mode 100644
index 0000000000..edb798b2d6
--- /dev/null
+++ b/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
@@ -0,0 +1,927 @@
+/***************************************************************************
+                          tnlFastSweepingMap2D_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLFASTSWEEPING2D_IMPL_H_
+#define TNLFASTSWEEPING2D_IMPL_H_
+
+#include "tnlFastSweepingMap.h"
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlFastSweepingMap< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap()
+:Entity(Mesh),
+ dofVector(Mesh),
+ dofVector2(Mesh)
+{
+}
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+	dofVector2.load(initialCondition);
+
+	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
+	Entity.refresh();
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+
+	cout << "a" << endl;
+	return initGrid();
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
+{
+
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
+	{
+		dofVector2[i]=INT_MAX*Sign(dofVector[i]);
+	}
+
+	for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++)
+	{
+		for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++)
+			{
+			this->Entity.setCoordinates(CoordinatesType(i,j));
+			this->Entity.refresh();
+			neighbourEntities.refresh(Mesh,Entity.getIndex());
+
+				if(dofVector[this->Entity.getIndex()] > 0)
+				{
+					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1111(i,j);
+							else
+								setupSquare1110(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1101(i,j);
+							else
+								setupSquare1100(i,j);
+						}
+					}
+					else
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1011(i,j);
+							else
+								setupSquare1010(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1001(i,j);
+							else
+								setupSquare1000(i,j);
+						}
+					}
+				}
+				else
+				{
+					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0111(i,j);
+							else
+								setupSquare0110(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0101(i,j);
+							else
+								setupSquare0100(i,j);
+						}
+					}
+					else
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0011(i,j);
+							else
+								setupSquare0010(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0001(i,j);
+							else
+								setupSquare0000(i,j);
+						}
+					}
+				}
+
+			}
+	}
+	cout << "a" << endl;
+
+//	Real tmp = 0.0;
+//	Real ax=0.5/sqrt(2.0);
+//
+//	if(!exactInput)
+//	{
+//		for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
+//				dofVector[i]=0.5*h*Sign(dofVector[i]);
+//	}
+//
+//
+//	for(Index i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		for(Index j = 1; j < Mesh.getDimensions().y()-1; j++)
+//		{
+//			 tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//			if(tmp == 0.0)
+//			{}
+//			else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//			{}
+//			else
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//		}
+//	}
+//
+//
+//
+//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		Index j = 0;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		Index j = Mesh.getDimensions().y() - 1;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
+//	{
+//		Index i = 0;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
+//	{
+//		Index i = Mesh.getDimensions().x() - 1;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//
+//	Index i = Mesh.getDimensions().x() - 1;
+//	Index j = Mesh.getDimensions().y() - 1;
+//
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	j = 0;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	i = 0;
+//	j = Mesh.getDimensions().y() -1;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	j = 0;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+
+	//data.setLike(dofVector2.getData());
+	//data=dofVector2.getData();
+	//cout << data.getType() << endl;
+	dofVector2.save("u-00000.tnl");
+	//dofVector2.getData().save("u-00000.tnl");
+
+	return true;
+}
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+	{
+		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+	{
+		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+	{
+		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+	{
+		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+
+//	data.setLike(dofVector2.getData());
+//	data = dofVector2.getData();
+//	cout << data.getType() << endl;
+	dofVector2.save("u-00001.tnl");
+	//dofVector2.getData().save("u-00001.tnl");
+
+	return true;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
+{
+
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+
+	Real value = dofVector2[Entity.getIndex()];
+	Real a,b, tmp;
+
+	if( i == 0 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+	else if( i == Mesh.getDimensions().x() - 1 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+	else
+	{
+		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+	}
+
+	if( j == 0 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+	else if( j == Mesh.getDimensions().y() - 1 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+	else
+	{
+		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+	}
+
+
+	if(fabs(a-b) >= h)
+		tmp = fabsMin(a,b) + Sign(value)*h;
+	else
+		tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
+
+
+	dofVector2[Entity.getIndex()] = fabsMin(value, tmp);
+
+//	if(dofVector2[Entity.getIndex()] > 1.0)
+//		cout << value << "    " << tmp << " " << dofVector2[Entity.getIndex()] << endl;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = fabs(x);
+	Real fy = fabs(y);
+
+	Real tmpMin = Min(fx,fy);
+
+	if(tmpMin == fx)
+		return x;
+	else
+		return y;
+
+}
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
+{
+//	this->Entity.setCoordinates(CoordinatesType(i,j));
+//	this->Entity.refresh();
+//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
+{
+//	this->Entity.setCoordinates(CoordinatesType(i,j));
+//	this->Entity.refresh();
+//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+}
+
+
+
+
+#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/examples/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
new file mode 100644
index 0000000000..6c8b553231
--- /dev/null
+++ b/examples/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
@@ -0,0 +1,196 @@
+/***************************************************************************
+                          tnlFastSweepingMap_CUDA.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLFASTSWEEPING_H_
+#define TNLFASTSWEEPING_H_
+
+#include <config/tnlParameterContainer.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlStaticVector.h>
+#include <core/tnlHost.h>
+#include <mesh/tnlGrid.h>
+#include <mesh/grids/tnlGridEntity.h>
+
+#include <functions/tnlMeshFunction.h>
+#include <limits.h>
+#include <core/tnlDevice.h>
+#include <ctime>
+
+
+
+
+
+template< typename Mesh,
+		  typename Real,
+		  typename Index >
+class tnlFastSweepingMap
+{};
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 2, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+	tnlFastSweepingMap();
+
+	__host__ static tnlString getType();
+	__host__ bool init( const tnlParameterContainer& parameters );
+	__host__ bool run();
+
+#ifdef HAVE_CUDA
+	__device__ bool initGrid();
+	__device__ void updateValue(const Index i, const Index j, Index* something_changed);
+	__device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3);
+	__device__ Real fabsMin(const Real x, const Real y);
+
+	tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
+	double* cudaDofVector;
+	double* cudaDofVector2;
+	double* map_cuda;
+	int counter;
+	int* changed;
+	__device__ void setupSquare1000(Index i, Index j);
+	__device__ void setupSquare1100(Index i, Index j);
+	__device__ void setupSquare1010(Index i, Index j);
+	__device__ void setupSquare1001(Index i, Index j);
+	__device__ void setupSquare1110(Index i, Index j);
+	__device__ void setupSquare1101(Index i, Index j);
+	__device__ void setupSquare1011(Index i, Index j);
+	__device__ void setupSquare1111(Index i, Index j);
+	__device__ void setupSquare0000(Index i, Index j);
+	__device__ void setupSquare0100(Index i, Index j);
+	__device__ void setupSquare0010(Index i, Index j);
+	__device__ void setupSquare0001(Index i, Index j);
+	__device__ void setupSquare0110(Index i, Index j);
+	__device__ void setupSquare0101(Index i, Index j);
+	__device__ void setupSquare0011(Index i, Index j);
+	__device__ void setupSquare0111(Index i, Index j);
+#endif
+
+	MeshType Mesh;
+
+protected:
+
+
+
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector;
+	DofVectorType data, map;
+
+
+	RealType h;
+
+
+};
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 3, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+
+
+	__host__ static tnlString getType();
+	__host__ bool init( const tnlParameterContainer& parameters );
+	__host__ bool run();
+
+#ifdef HAVE_CUDA
+	__device__ bool initGrid(int i, int j, int k);
+	__device__ void updateValue(const Index i, const Index j, const Index k);
+	__device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3);
+	__device__ Real fabsMin(const Real x, const Real y);
+
+	tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
+	double* cudaDofVector;
+	double* cudaDofVector2;
+	int counter;
+#endif
+
+	MeshType Mesh;
+
+protected:
+
+
+
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector;
+	DofVectorType data;
+
+	RealType h;
+
+
+};
+
+
+
+
+
+
+
+#ifdef HAVE_CUDA
+//template<int sweep_t>
+__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i, int* changed);
+//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+
+__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
+//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, tnlHost, int >, double, int >* solver);
+#endif
+
+/*various implementtions.... choose one*/
+//#include "tnlFastSweepingMap2D_CUDA_impl.h"
+//#include "tnlFastSweepingMap2D_CUDA_v2_impl.h"
+//#include "tnlFastSweepingMap2D_CUDA_v3_impl.h"
+#include "tnlFastSweepingMap2D_CUDA_v4_impl.h"
+//#include "tnlFastSweepingMap2D_CUDA_v5_impl.h"
+
+
+//															#include "tnlFastSweepingMap3D_CUDA_impl.h"
+
+#endif /* TNLFASTSWEEPING_H_ */
diff --git a/examples/narrow-band/CMakeLists.txt b/examples/narrow-band/CMakeLists.txt
new file mode 100755
index 0000000000..7a2963cc68
--- /dev/null
+++ b/examples/narrow-band/CMakeLists.txt
@@ -0,0 +1,22 @@
+set( tnl_fast_sweeping_SOURCES
+#     MainBuildConfig.h
+#     tnlFastSweeping2D_impl.h
+#     tnlFastSweeping.h
+#     fastSweepingConfig.h 
+     main.cpp)
+
+
+IF(  BUILD_CUDA ) 
+	CUDA_ADD_EXECUTABLE(fast-sweeping${debugExt} main.cu)
+ELSE(  BUILD_CUDA )                
+	ADD_EXECUTABLE(fast-sweeping${debugExt} main.cpp)
+ENDIF( BUILD_CUDA )
+target_link_libraries (fast-sweeping${debugExt} tnl${debugExt}-${tnlVersion} )
+
+
+INSTALL( TARGETS fast-sweeping${debugExt}
+         RUNTIME DESTINATION bin
+         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
+        
+#INSTALL( FILES ${tnl_fast_sweeping_SOURCES}
+#         DESTINATION share/tnl-${tnlVersion}/examples/fast-sweeping )
diff --git a/examples/narrow-band/MainBuildConfig.h b/examples/narrow-band/MainBuildConfig.h
new file mode 100644
index 0000000000..1b904c0c8b
--- /dev/null
+++ b/examples/narrow-band/MainBuildConfig.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          MainBuildConfig.h  -  description
+                             -------------------
+    begin                : Jul 7, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MAINBUILDCONFIG_H_
+#define MAINBUILDCONFIG_H_
+
+#include <solvers/tnlBuildConfigTags.h>
+
+class MainBuildConfig
+{
+   public:
+
+      static void print() { cerr << "MainBuildConfig" << endl; }
+};
+
+/****
+ * Turn off support for float and long double.
+ */
+template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
+template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
+
+/****
+ * Turn off support for short int and long int indexing.
+ */
+template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
+template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
+
+/****
+ * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
+ */
+template< int Dimensions, typename Real, typename Device, typename Index >
+   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
+      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
+                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
+                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
+                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
+
+/****
+ * Please, chose your preferred time discretisation  here.
+ */
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
+
+/****
+ * Only the Runge-Kutta-Merson solver is enabled by default.
+ */
+template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
+
+#endif /* MAINBUILDCONFIG_H_ */
diff --git a/examples/narrow-band/main.cpp b/examples/narrow-band/main.cpp
new file mode 100644
index 0000000000..8849008ff6
--- /dev/null
+++ b/examples/narrow-band/main.cpp
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cpp  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/narrow-band/main.cu b/examples/narrow-band/main.cu
new file mode 100644
index 0000000000..8849008ff6
--- /dev/null
+++ b/examples/narrow-band/main.cu
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cpp  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/narrow-band/main.h b/examples/narrow-band/main.h
new file mode 100644
index 0000000000..0aa9722469
--- /dev/null
+++ b/examples/narrow-band/main.h
@@ -0,0 +1,88 @@
+/***************************************************************************
+                          main.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include "MainBuildConfig.h"
+	//for HOST versions:
+//#include "tnlNarrowBand.h"
+	//for DEVICE versions:
+#include "tnlNarrowBand_CUDA.h"
+#include "narrowBandConfig.h"
+#include <solvers/tnlBuildConfigTags.h>
+
+#include <mesh/tnlGrid.h>
+#include <core/tnlDevice.h>
+#include <time.h>
+#include <ctime>
+
+typedef MainBuildConfig BuildConfig;
+
+int main( int argc, char* argv[] )
+{
+	time_t start;
+	time_t stop;
+	time(&start);
+	std::clock_t start2= std::clock();
+   tnlParameterContainer parameters;
+   tnlConfigDescription configDescription;
+   narrowBandConfig< BuildConfig >::configSetup( configDescription );
+
+   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
+      return false;
+
+   const int& dim = parameters.getParameter< int >( "dim" );
+
+   if(dim == 2)
+   {
+		tnlNarrowBand<tnlGrid<2,double,tnlHost, int>, double, int> solver;
+		if(!solver.init(parameters))
+	   {
+			cerr << "Solver failed to initialize." << endl;
+			return EXIT_FAILURE;
+	   }
+		checkCudaDevice;
+	   cout << "-------------------------------------------------------------" << endl;
+	   cout << "Starting solver..." << endl;
+	   solver.run();
+   }
+//   else if(dim == 3)
+//   {
+//		tnlNarrowBand<tnlGrid<3,double,tnlHost, int>, double, int> solver;
+//		if(!solver.init(parameters))
+//	   {
+//			cerr << "Solver failed to initialize." << endl;
+//			return EXIT_FAILURE;
+//	   }
+//		checkCudaDevice;
+//	   cout << "-------------------------------------------------------------" << endl;
+//	   cout << "Starting solver..." << endl;
+//	   solver.run();
+//   }
+   else
+   {
+	   cerr << "Unsupported number of dimensions: " << dim << "!" << endl;
+	   return EXIT_FAILURE;
+   }
+
+
+   time(&stop);
+   cout << "Solver stopped..." << endl;
+   cout << endl;
+   cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) << endl;
+   return EXIT_SUCCESS;
+}
+
+
diff --git a/examples/narrow-band/narrowBandConfig.h b/examples/narrow-band/narrowBandConfig.h
new file mode 100644
index 0000000000..9ccc1e8cb8
--- /dev/null
+++ b/examples/narrow-band/narrowBandConfig.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+                          narrowBandConfig.h  -  description
+                             -------------------
+    begin                : Oct 15, 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+    email                :
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef NARROWBANDCONFIG_H_
+#define NARROWBANDCONFIG_H_
+
+#include <config/tnlConfigDescription.h>
+
+template< typename ConfigTag >
+class narrowBandConfig
+{
+   public:
+      static void configSetup( tnlConfigDescription& config )
+      {
+         config.addDelimiter( "Parallel Eikonal solver settings:" );
+         config.addEntry        < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
+         config.addRequiredEntry        < tnlString > ( "initial-condition", "Initial condition for solver");
+         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
+         config.addRequiredEntry        < double > ( "tau", "Time step.");
+         config.addRequiredEntry        < double > ( "final-time", "Final time.");
+         config.addEntry       < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" );
+         config.addEntry       < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
+      }
+};
+
+#endif /* NARROWBANDCONFIG_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand.h b/examples/narrow-band/tnlNarrowBand.h
new file mode 100644
index 0000000000..b302f3ff91
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand.h
@@ -0,0 +1,186 @@
+/***************************************************************************
+                          tnlNarrowBand.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND_H_
+#define TNLNARROWBAND_H_
+
+#include <config/tnlParameterContainer.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlStaticVector.h>
+#include <functions/tnlMeshFunction.h>
+#include <core/tnlHost.h>
+#include <mesh/tnlGrid.h>
+#include <mesh/grids/tnlGridEntity.h>
+#include <limits.h>
+#include <core/tnlDevice.h>
+#include <ctime>
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#endif
+
+
+
+
+template< typename Mesh,
+		  typename Real,
+		  typename Index >
+class tnlNarrowBand
+{};
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 2, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+
+	tnlNarrowBand();
+
+	static tnlString getType();
+	bool init( const tnlParameterContainer& parameters );
+
+	bool initGrid();
+	bool run();
+
+	//for single core version use this implementation:
+	void updateValue(const Index i, const Index j);
+	//for parallel version use this one instead:
+//	void updateValue(const Index i, const Index j, DofVectorType* grid);
+
+
+	void setupSquare1000(Index i, Index j);
+	void setupSquare1100(Index i, Index j);
+	void setupSquare1010(Index i, Index j);
+	void setupSquare1001(Index i, Index j);
+	void setupSquare1110(Index i, Index j);
+	void setupSquare1101(Index i, Index j);
+	void setupSquare1011(Index i, Index j);
+	void setupSquare1111(Index i, Index j);
+	void setupSquare0000(Index i, Index j);
+	void setupSquare0100(Index i, Index j);
+	void setupSquare0010(Index i, Index j);
+	void setupSquare0001(Index i, Index j);
+	void setupSquare0110(Index i, Index j);
+	void setupSquare0101(Index i, Index j);
+	void setupSquare0011(Index i, Index j);
+	void setupSquare0111(Index i, Index j);
+
+	Real fabsMin(const Real x, const Real y);
+
+
+protected:
+
+	MeshType Mesh;
+
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector, dofVector2;
+	DofVectorType data;
+
+	RealType h;
+
+	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
+
+
+#ifdef HAVE_OPENMP
+//	omp_lock_t* gridLock;
+#endif
+
+
+};
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 3, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+	tnlNarrowBand();
+
+	static tnlString getType();
+	bool init( const tnlParameterContainer& parameters );
+
+	bool initGrid();
+	bool run();
+
+	//for single core version use this implementation:
+	void updateValue(const Index i, const Index j, const Index k);
+	//for parallel version use this one instead:
+//	void updateValue(const Index i, const Index j, DofVectorType* grid);
+
+	Real fabsMin(const Real x, const Real y);
+
+
+protected:
+
+	MeshType Mesh;
+
+	bool exactInput;
+
+
+	tnlMeshFunction<MeshType> dofVector, dofVector2;
+	DofVectorType data;
+
+	RealType h;
+
+	tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity;
+
+#ifdef HAVE_OPENMP
+//	omp_lock_t* gridLock;
+#endif
+
+
+};
+
+
+	//for single core version use this implementation:
+#include "tnlNarrowBand2D_impl.h"
+	//for parallel version use this one instead:
+// #include "tnlNarrowBand2D_openMP_impl.h"
+
+#include "tnlNarrowBand3D_impl.h"
+
+#endif /* TNLNARROWBAND_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
new file mode 100644
index 0000000000..893e6d1c67
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
@@ -0,0 +1,1109 @@
+/***************************************************************************
+                          tnlNarrowBand2D_CUDA_v4_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND2D_IMPL_H_
+#define TNLNARROWBAND2D_IMPL_H_
+
+#define NARROWBAND_SUBGRID_SIZE 8
+
+#include "tnlNarrowBand.h"
+
+__device__
+double fabsMin( double x, double y)
+{
+	double fx = abs(x);
+
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+}
+
+__device__
+double atomicFabsMin(double* address, double val)
+{
+	unsigned long long int* address_as_ull =
+						  (unsigned long long int*)address;
+	unsigned long long int old = *address_as_ull, assumed;
+	do {
+		assumed = old;
+			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
+	} while (assumed != old);
+	return __longlong_as_double(old);
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlNarrowBand< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
+:dofVector(Mesh)
+{
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
+	//Entity.refresh();
+	counter = 0;
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+
+	tau = parameters.getParameter< double >( "tau" );
+
+	finalTime = parameters.getParameter< double >( "final-time" );
+
+	statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
+#ifdef HAVE_CUDA
+
+	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(cudaStatusVector),  statusGridSize*statusGridSize* sizeof(int));
+//	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(),  statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice);
+
+
+	cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
+	cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
+
+#endif
+
+	int n = Mesh.getDimensions().x();
+
+	dim3 threadsPerBlock2(1, 1);
+	dim3 numBlocks2(((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE) ,((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE));
+	initStatusGridCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+
+	dim3 threadsPerBlock(16, 16);
+	dim3 numBlocks(n/16 + 1 ,n/16 +1);
+	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+
+
+	return true;
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	int n = Mesh.getDimensions().x();
+	dim3 threadsPerBlock(1, 512);
+	dim3 numBlocks(4,1);
+
+	double time = 0.0;
+
+	while(time < finalTime)
+	{
+		if(tau+time > finalTime)
+			tau=finalTime-time;
+
+		runNarrowBandCUDA(this->cudaSolver,tau);
+
+		time += tau;
+	}
+
+	runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0);
+
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+	//data.setLike(dofVector.getData());
+	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+	cudaDeviceSynchronize();
+	cudaFree(cudaDofVector);
+	cudaFree(cudaDofVector2);
+	cudaFree(cudaSolver);
+	//data.save("u-00001.tnl");
+	dofVector.save("u-00001.tnl");
+	cudaDeviceSynchronize();
+	return true;
+}
+
+
+
+
+#ifdef HAVE_CUDA
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
+{
+	if(cudaStatusVector[i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)] != 0)
+	{
+		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+		Entity.setCoordinates(CoordinatesType(i,j));
+		Entity.refresh();
+		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		Real value = cudaDofVector2[Entity.getIndex()];
+		Real a,b, tmp;
+
+		if( i == 0 )
+			a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		else if( i == Mesh.getDimensions().x() - 1 )
+			a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		else
+		{
+			a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
+					 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+		}
+
+		if( j == 0 )
+			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		else if( j == Mesh.getDimensions().y() - 1 )
+			b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+		else
+		{
+			b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
+					 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+		}
+
+
+		if(abs(a-b) >= h)
+			tmp = fabsMin(a,b) + Sign(value)*h;
+		else
+			tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
+
+	//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
+		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
+	}
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
+{
+	int i = threadIdx.x + blockDim.x*blockIdx.x;
+	int j = blockDim.y*blockIdx.y + threadIdx.y;
+
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+
+	int gid = Entity.getIndex();
+
+	cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]);
+//
+//	if(abs(cudaDofVector[gid]) < 1.01*h)
+//		cudaDofVector2[gid] = cudaDofVector[gid];
+
+
+
+
+
+	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() )
+	{
+		if(cudaDofVector[Entity.getIndex()] > 0)
+		{
+			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1111(i,j);
+					else
+						setupSquare1110(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1101(i,j);
+					else
+						setupSquare1100(i,j);
+				}
+			}
+			else
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1011(i,j);
+					else
+						setupSquare1010(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare1001(i,j);
+					else
+						setupSquare1000(i,j);
+				}
+			}
+		}
+		else
+		{
+			if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0111(i,j);
+					else
+						setupSquare0110(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0101(i,j);
+					else
+						setupSquare0100(i,j);
+				}
+			}
+			else
+			{
+				if(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0011(i,j);
+					else
+						setupSquare0010(i,j);
+				}
+				else
+				{
+					if(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+						setupSquare0001(i,j);
+					else
+						setupSquare0000(i,j);
+				}
+			}
+		}
+
+	}
+
+	return true;
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = abs(x);
+	//Real fy = abs(y);
+
+	//Real tmpMin = Min(fx,abs(y));
+
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+
+
+}
+
+
+
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+{
+
+
+	int gx = 0;
+	int gy = threadIdx.y;
+	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
+	//	return;
+	int n = solver->Mesh.getDimensions().x();
+	int blockCount = n/blockDim.y +1;
+	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
+	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
+
+	//int id1 = gx+gy;
+	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
+
+	if(blockIdx.x==0)
+	{
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==1)
+	{
+		gx=n-1;
+		gy=threadIdx.y;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==2)
+	{
+		gx=0;
+		gy=n-threadIdx.y-1;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+	else if(blockIdx.x==3)
+	{
+		gx=n-1;
+		gy=n-threadIdx.y-1;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+	}
+
+
+
+
+
+}
+
+
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+{
+
+
+	int gx = threadIdx.x + blockDim.x*blockIdx.x;
+	int gy = blockDim.y*blockIdx.y + threadIdx.y;
+
+
+	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
+	{
+		solver->initGrid();
+	}
+
+
+}
+
+
+
+
+
+__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver)
+{
+	cudaStatusVector[blockIdx.x + blockDim.x*blockIdx.y] = 0;
+}
+
+__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, double tau)
+{
+	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
+	int i = threadIdx.x + blockIdx.x*blockDim.x;
+	int j = threadIdx.y + blockIdx.y*blockDim.y;
+
+
+	if(cudaStatusVector[i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)] != 0)
+	{
+		tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
+		Entity.setCoordinates(CoordinatesType(i,j));
+		Entity.refresh();
+		tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+		Real value = cudaDofVector2[Entity.getIndex()];
+		Real xf,xb,yf,yb, tmp, fu;
+
+		if( i == 0 )
+			yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		else if( i == Mesh.getDimensions().x() - 1 )
+			yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+		else
+		{
+			yb =  solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+			yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+		}
+
+		if( j == 0 )
+			xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		else if( j == Mesh.getDimensions().y() - 1 )
+			xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
+		else
+		{
+			xb =  solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()];
+			xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+		}
+
+
+		tmp = sqrt(0.5 * (xf*xf + xb*xb    +   yf*yf + yb*yb ) );
+
+		fu = 1.0 * tmp;
+
+		if((tau*fu+u)*u <=0 )
+		{
+//			if(blockIdx.x < gridDim.x -1)
+//			{
+//				atomicMax( &cudaStatusVector[(i+2)/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//				if(blockIdx.y < gridDim.y -1)
+//					atomicMax( &cudaStatusVector[(i+2)/NARROWBAND_SUBGRID_SIZE + (j+2/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//				if(blockIdx.y > 0)
+//					atomicMax( &cudaStatusVector[(i+2)/NARROWBAND_SUBGRID_SIZE + (j-2/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//
+//			}
+//			if(blockIdx.x > 0)
+//			{
+//				atomicMax( &cudaStatusVector[(i-2)/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//				if(blockIdx.y < gridDim.y -1)
+//					atomicMax( &cudaStatusVector[(i-2)/NARROWBAND_SUBGRID_SIZE + (j+2/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//				if(blockIdx.y > 0)
+//					atomicMax( &cudaStatusVector[(i-2)/NARROWBAND_SUBGRID_SIZE + (j-2/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//
+//			}
+//			if(blockIdx.y < gridDim.y -1)
+//				atomicMax( &cudaStatusVector[i/NARROWBAND_SUBGRID_SIZE + ((j+2)/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+//			if(blockIdx.y > 0)
+//				atomicMax( &cudaStatusVector[i/NARROWBAND_SUBGRID_SIZE + ((j-2)/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE)]  ,1);
+		}
+
+
+		cudaDofVector2[Entity.getIndex()]  = u+tau*fu;
+	}
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[Entity.getIndex()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	Real al,be, a,b,c,s;
+	al=abs(cudaDofVector[Entity.getIndex()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 cudaDofVector[Entity.getIndex()]));
+
+	be=abs(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
+{
+	tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+}
+#endif
+
+
+
+
+#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand2D_impl.h b/examples/narrow-band/tnlNarrowBand2D_impl.h
new file mode 100644
index 0000000000..a1d255c1af
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand2D_impl.h
@@ -0,0 +1,927 @@
+/***************************************************************************
+                          tnlNarrowBand2D_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND2D_IMPL_H_
+#define TNLNARROWBAND2D_IMPL_H_
+
+#include "tnlNarrowBand.h"
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlNarrowBand< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
+:Entity(Mesh),
+ dofVector(Mesh),
+ dofVector2(Mesh)
+{
+}
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+	dofVector2.load(initialCondition);
+
+	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
+	Entity.refresh();
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+
+	cout << "a" << endl;
+	return initGrid();
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
+{
+
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
+	{
+		dofVector2[i]=INT_MAX*Sign(dofVector[i]);
+	}
+
+	for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++)
+	{
+		for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++)
+			{
+			this->Entity.setCoordinates(CoordinatesType(i,j));
+			this->Entity.refresh();
+			neighbourEntities.refresh(Mesh,Entity.getIndex());
+
+				if(dofVector[this->Entity.getIndex()] > 0)
+				{
+					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1111(i,j);
+							else
+								setupSquare1110(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1101(i,j);
+							else
+								setupSquare1100(i,j);
+						}
+					}
+					else
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1011(i,j);
+							else
+								setupSquare1010(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare1001(i,j);
+							else
+								setupSquare1000(i,j);
+						}
+					}
+				}
+				else
+				{
+					if(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()] > 0)
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0111(i,j);
+							else
+								setupSquare0110(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0101(i,j);
+							else
+								setupSquare0100(i,j);
+						}
+					}
+					else
+					{
+						if(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()] > 0)
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0011(i,j);
+							else
+								setupSquare0010(i,j);
+						}
+						else
+						{
+							if(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()] > 0)
+								setupSquare0001(i,j);
+							else
+								setupSquare0000(i,j);
+						}
+					}
+				}
+
+			}
+	}
+	cout << "a" << endl;
+
+//	Real tmp = 0.0;
+//	Real ax=0.5/sqrt(2.0);
+//
+//	if(!exactInput)
+//	{
+//		for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
+//				dofVector[i]=0.5*h*Sign(dofVector[i]);
+//	}
+//
+//
+//	for(Index i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		for(Index j = 1; j < Mesh.getDimensions().y()-1; j++)
+//		{
+//			 tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//			if(tmp == 0.0)
+//			{}
+//			else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//			{}
+//			else
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//		}
+//	}
+//
+//
+//
+//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		Index j = 0;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
+//	{
+//		Index j = Mesh.getDimensions().y() - 1;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
+//	{
+//		Index i = 0;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
+//	{
+//		Index i = Mesh.getDimensions().x() - 1;
+//		tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//
+//
+//		if(tmp == 0.0)
+//		{}
+//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
+//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
+//		{}
+//		else
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//	}
+//
+//
+//	Index i = Mesh.getDimensions().x() - 1;
+//	Index j = Mesh.getDimensions().y() - 1;
+//
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	j = 0;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	i = 0;
+//	j = Mesh.getDimensions().y() -1;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+//
+//
+//
+//	j = 0;
+//	tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
+//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
+//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
+//
+//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
+
+	//data.setLike(dofVector2.getData());
+	//data=dofVector2.getData();
+	//cout << data.getType() << endl;
+	dofVector2.save("u-00000.tnl");
+	//dofVector2.getData().save("u-00000.tnl");
+
+	return true;
+}
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+	{
+		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+	{
+		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+	{
+		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+	{
+		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+		{
+			updateValue(i,j);
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+
+//	data.setLike(dofVector2.getData());
+//	data = dofVector2.getData();
+//	cout << data.getType() << endl;
+	dofVector2.save("u-00001.tnl");
+	//dofVector2.getData().save("u-00001.tnl");
+
+	return true;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
+{
+
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+
+	Real value = dofVector2[Entity.getIndex()];
+	Real a,b, tmp;
+
+	if( i == 0 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()];
+	else if( i == Mesh.getDimensions().x() - 1 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()];
+	else
+	{
+		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0 >()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()] );
+	}
+
+	if( j == 0 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()];
+	else if( j == Mesh.getDimensions().y() - 1 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+	else
+	{
+		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1 >()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()] );
+	}
+
+
+	if(fabs(a-b) >= h)
+		tmp = fabsMin(a,b) + Sign(value)*h;
+	else
+		tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
+
+
+	dofVector2[Entity.getIndex()] = fabsMin(value, tmp);
+
+//	if(dofVector2[Entity.getIndex()] > 1.0)
+//		cout << value << "    " << tmp << " " << dofVector2[Entity.getIndex()] << endl;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = fabs(x);
+	Real fy = fabs(y);
+
+	Real tmpMin = Min(fx,fy);
+
+	if(tmpMin == fx)
+		return x;
+	else
+		return y;
+
+}
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
+{
+//	this->Entity.setCoordinates(CoordinatesType(i,j));
+//	this->Entity.refresh();
+//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
+{
+//	this->Entity.setCoordinates(CoordinatesType(i,j));
+//	this->Entity.refresh();
+//	auto neighbourEntities =  Entity.getNeighbourEntities();
+//	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+//	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[Entity.getIndex()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	a = be/al;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-al;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	Real al,be, a,b,c,s;
+	al=abs(dofVector[Entity.getIndex()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()]-
+			 dofVector[Entity.getIndex()]));
+
+	be=abs(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]/
+			(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()]-
+			 dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()]));
+
+	a = al-be;
+	b=1.0;
+	c=-be;
+	s= h/sqrt(a*a+b*b);
+
+
+	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j));
+	this->Entity.refresh();
+	auto neighbourEntities =  Entity.getNeighbourEntities();
+	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
+	dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 0,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  1 >()]);
+	dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighbourEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighbourEntities.template getEntityIndex< 1,  0 >()]);
+}
+
+
+
+
+#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
new file mode 100644
index 0000000000..bebb1b1a2b
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand3D_CUDA_impl.h
@@ -0,0 +1,961 @@
+/***************************************************************************
+                          tnlNarrowBand2D_CUDA_v4_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND3D_IMPL_H_
+#define TNLNARROWBAND3D_IMPL_H_
+
+#include "tnlNarrowBand.h"
+
+//__device__
+//double fabsMin( double x, double y)
+//{
+//	double fx = abs(x);
+//
+//	if(Min(fx,abs(y)) == fx)
+//		return x;
+//	else
+//		return y;
+//}
+//
+//__device__
+//double atomicFabsMin(double* address, double val)
+//{
+//	unsigned long long int* address_as_ull =
+//						  (unsigned long long int*)address;
+//	unsigned long long int old = *address_as_ull, assumed;
+//	do {
+//		assumed = old;
+//			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) ));
+//	} while (assumed != old);
+//	return __longlong_as_double(old);
+//}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlNarrowBand< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+	this->h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
+	counter = 0;
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+
+
+#ifdef HAVE_CUDA
+
+	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
+	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
+
+
+	cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >));
+	cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
+
+#endif
+
+	int n = Mesh.getDimensions().x();
+	dim3 threadsPerBlock(8, 8,8);
+	dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1);
+
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+	return true;
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	int n = Mesh.getDimensions().x();
+	dim3 threadsPerBlock(1, 512);
+	dim3 numBlocks(8,1);
+
+
+	runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0);
+
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+
+	cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+	cudaDeviceSynchronize();
+	cudaFree(cudaDofVector);
+	cudaFree(cudaDofVector2);
+	cudaFree(cudaSolver);
+	dofVector.save("u-00001.tnl");
+	cudaDeviceSynchronize();
+	return true;
+}
+
+
+
+
+#ifdef HAVE_CUDA
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
+{
+	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j,k));
+	Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	Real value = cudaDofVector2[Entity.getIndex()];
+	Real a,b,c, tmp;
+
+	if( i == 0 )
+		a = cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()];
+	else if( i == Mesh.getDimensions().x() - 1 )
+		a = cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+	else
+	{
+		a = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()],
+				 cudaDofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0 >()] );
+	}
+
+	if( j == 0 )
+		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()];
+	else if( j == Mesh.getDimensions().y() - 1 )
+		b = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()];
+	else
+	{
+		b = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0 >()],
+				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0 >()] );
+	}
+
+	if( k == 0 )
+		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()];
+	else if( k == Mesh.getDimensions().z() - 1 )
+		c = cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()];
+	else
+	{
+		c = fabsMin( cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1 >()],
+				 cudaDofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1 >()] );
+	}
+
+	Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c);
+
+	if(hD < 0.0)
+		tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h;
+	else
+		tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) );
+
+	atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp);
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k)
+{
+	tnlGridEntity< tnlGrid< 3,double, tnlHost, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
+	Entity.setCoordinates(CoordinatesType(i,j,k));
+	Entity.refresh();
+	int gid = Entity.getIndex();
+
+	if(abs(cudaDofVector[gid]) < 1.8*h)
+		cudaDofVector2[gid] = cudaDofVector[gid];
+	else
+		cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]);
+
+	return true;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+__device__
+Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = abs(x);
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+
+
+}
+
+
+
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i)
+{
+
+	int gx = 0;
+	int gy = threadIdx.y;
+
+	int n = solver->Mesh.getDimensions().x();
+	int blockCount = n/blockDim.y +1;
+
+	if(blockIdx.x==0)
+	{
+		for(int gz = 0; gz < n;gz++)
+		{
+		gx = 0;
+		gy = threadIdx.y;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		__syncthreads();
+		}
+	}
+	else if(blockIdx.x==1)
+	{
+		for(int gz = 0; gz < n;gz++)
+		{
+		gx=n-1;
+		gy=threadIdx.y;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+	else if(blockIdx.x==2)
+	{
+
+		for(int gz = 0; gz < n;gz++)
+		{
+		gx=0;
+		gy=n-threadIdx.y-1;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+	else if(blockIdx.x==3)
+	{
+		for(int gz = 0; gz < n;gz++)
+		{
+		gx=n-1;
+		gy=n-threadIdx.y-1;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+
+
+
+
+	else if(blockIdx.x==4)
+	{
+		for(int gz = n-1; gz > -1;gz--)
+		{
+		gx = 0;
+		gy = threadIdx.y;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+	else if(blockIdx.x==5)
+	{
+		for(int gz = n-1; gz > -1;gz--)
+		{
+		gx=n-1;
+		gy=threadIdx.y;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy < n)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy+=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+	else if(blockIdx.x==6)
+	{
+
+		for(int gz = n-1; gz > -1;gz--)
+		{
+		gx=0;
+		gy=n-threadIdx.y-1;
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx++;
+				if(gx==n)
+				{
+					gx=0;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+	else if(blockIdx.x==7)
+	{
+		for(int gz = n-1; gz > -1;gz--)
+		{
+		gx=n-1;
+		gy=n-threadIdx.y-1;
+
+		for(int k = 0; k < n*blockCount + blockDim.y; k++)
+		{
+			if(threadIdx.y  < k+1 && gy > -1)
+			{
+				solver->updateValue(gx,gy,gz);
+				gx--;
+				if(gx==-1)
+				{
+					gx=n-1;
+					gy-=blockDim.y;
+				}
+			}
+
+
+			__syncthreads();
+		}
+		}
+	}
+
+
+
+
+}
+
+
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver)
+{
+	int gx = threadIdx.x + blockDim.x*blockIdx.x;
+	int gy = blockDim.y*blockIdx.y + threadIdx.y;
+	int gz = blockDim.z*blockIdx.z + threadIdx.z;
+
+	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz)
+	{
+		solver->initGrid(gx,gy,gz);
+	}
+
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	a = be/al;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//
+//
+//
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	a = al-be;
+//	b=1.0;
+//	c=-al;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	a = al-be;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//
+//
+//
+//
+//
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
+//
+//	a = al-be;
+//	b=1.0;
+//	c=-al;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	Real al,be, a,b,c,s;
+//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
+//
+//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
+//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
+//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
+//
+//	a = al-be;
+//	b=1.0;
+//	c=-be;
+//	s= h/sqrt(a*a+b*b);
+//
+//
+//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//
+//}
+//
+//template< typename MeshReal,
+//          typename Device,
+//          typename MeshIndex,
+//          typename Real,
+//          typename Index >
+//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
+//{
+//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
+//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
+//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
+//}
+#endif
+
+
+
+
+#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand3D_impl.h b/examples/narrow-band/tnlNarrowBand3D_impl.h
new file mode 100644
index 0000000000..fb69280dad
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand3D_impl.h
@@ -0,0 +1,307 @@
+/***************************************************************************
+                          tnlNarrowBand2D_impl.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND3D_IMPL_H_
+#define TNLNARROWBAND3D_IMPL_H_
+
+#include "tnlNarrowBand.h"
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlString tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+	   return tnlString( "tnlNarrowBand< " ) +
+	          MeshType::getType() + ", " +
+	          ::getType< Real >() + ", " +
+	          ::getType< Index >() + " >";
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
+:Entity(Mesh),
+ dofVector(Mesh),
+ dofVector2(Mesh)
+{
+}
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+	const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+
+	if( ! Mesh.load( meshFile ) )
+	{
+		   cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	}
+
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	if( ! dofVector.load( initialCondition ) )
+	{
+		   cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl;
+		   return false;
+	}
+	dofVector2.load(initialCondition);
+
+	h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
+	Entity.refresh();
+
+	const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" );
+
+	if(exact_input == "no")
+		exactInput=false;
+	else
+		exactInput=true;
+//	cout << "bla "<<endl;
+	return initGrid();
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
+{
+	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++)
+	{
+
+		if (abs(dofVector[i]) < 1.8*h)
+			dofVector2[i]=dofVector[i];
+		else
+			dofVector2[i]=INT_MAX*Sign(dofVector[i]);
+	}
+
+	return true;
+}
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
+{
+
+	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
+	{
+		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+		{
+			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
+	{
+		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+		{
+			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
+	{
+		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+		{
+			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
+	{
+		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+		{
+			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+
+
+
+
+
+
+
+	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
+	{
+		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+		{
+			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
+	{
+		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+		{
+			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
+	{
+		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
+		{
+			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
+	{
+		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
+		{
+			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
+			{
+				updateValue(i,j,k);
+			}
+		}
+	}
+
+/*---------------------------------------------------------------------------------------------------------------------------*/
+
+
+	dofVector2.save("u-00001.tnl");
+
+	cout << "bla 3"<<endl;
+	return true;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
+{
+	this->Entity.setCoordinates(CoordinatesType(i,j,k));
+	this->Entity.refresh();
+	tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities(Entity);
+	Real value = dofVector2[Entity.getIndex()];
+	Real a,b,c, tmp;
+
+	if( i == 0 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()];
+	else if( i == Mesh.getDimensions().x() - 1 )
+		a = dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0 >()];
+	else
+	{
+		a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1,  0,  0>()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 1,  0,  0>()] );
+	}
+
+	if( j == 0 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()];
+	else if( j == Mesh.getDimensions().y() - 1 )
+		b = dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()];
+	else
+	{
+		b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  -1,  0>()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 0,  1,  0>()] );
+	}
+
+	if( k == 0 )
+		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()];
+	else if( k == Mesh.getDimensions().z() - 1 )
+		c = dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()];
+	else
+	{
+		c = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  -1>()],
+				 dofVector2[neighbourEntities.template getEntityIndex< 0,  0,  1>()] );
+	}
+
+	Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c);
+
+	if(hD < 0.0)
+		tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h;
+	else
+		tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) );
+
+
+	dofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
+{
+	Real fx = fabs(x);
+	Real fy = fabs(y);
+
+	Real tmpMin = Min(fx,fy);
+
+	if(tmpMin == fx)
+		return x;
+	else
+		return y;
+
+}
+
+
+
+#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/examples/narrow-band/tnlNarrowBand_CUDA.h b/examples/narrow-band/tnlNarrowBand_CUDA.h
new file mode 100644
index 0000000000..c77f506e54
--- /dev/null
+++ b/examples/narrow-band/tnlNarrowBand_CUDA.h
@@ -0,0 +1,192 @@
+/***************************************************************************
+                          tnlNarrowBand_CUDA.h  -  description
+                             -------------------
+    begin                : Oct 15 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+#ifndef TNLNARROWBAND_H_
+#define TNLNARROWBAND_H_
+
+#include <config/tnlParameterContainer.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlStaticVector.h>
+#include <core/tnlHost.h>
+#include <mesh/tnlGrid.h>
+#include <mesh/grids/tnlGridEntity.h>
+
+#include <functions/tnlMeshFunction.h>
+#include <limits.h>
+#include <core/tnlDevice.h>
+#include <ctime>
+
+
+
+
+
+template< typename Mesh,
+		  typename Real,
+		  typename Index >
+class tnlNarrowBand
+{};
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 2, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+	tnlNarrowBand();
+
+	__host__ static tnlString getType();
+	__host__ bool init( const tnlParameterContainer& parameters );
+	__host__ bool run();
+
+#ifdef HAVE_CUDA
+	__device__ bool initGrid();
+	__device__ void updateValue(const Index i, const Index j);
+	__device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3);
+	__device__ Real fabsMin(const Real x, const Real y);
+
+	tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
+	double* cudaDofVector;
+	double* cudaDofVector2;
+	int* cudaStatusVector;
+	int counter;
+	__device__ void setupSquare1000(Index i, Index j);
+	__device__ void setupSquare1100(Index i, Index j);
+	__device__ void setupSquare1010(Index i, Index j);
+	__device__ void setupSquare1001(Index i, Index j);
+	__device__ void setupSquare1110(Index i, Index j);
+	__device__ void setupSquare1101(Index i, Index j);
+	__device__ void setupSquare1011(Index i, Index j);
+	__device__ void setupSquare1111(Index i, Index j);
+	__device__ void setupSquare0000(Index i, Index j);
+	__device__ void setupSquare0100(Index i, Index j);
+	__device__ void setupSquare0010(Index i, Index j);
+	__device__ void setupSquare0001(Index i, Index j);
+	__device__ void setupSquare0110(Index i, Index j);
+	__device__ void setupSquare0101(Index i, Index j);
+	__device__ void setupSquare0011(Index i, Index j);
+	__device__ void setupSquare0111(Index i, Index j);
+#endif
+
+	MeshType Mesh;
+
+protected:
+
+	int statusGridSize;
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector;
+	DofVectorType data;
+
+
+	RealType h, tau, finalTime;
+
+
+};
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 3, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+
+
+	__host__ static tnlString getType();
+	__host__ bool init( const tnlParameterContainer& parameters );
+	__host__ bool run();
+
+#ifdef HAVE_CUDA
+	__device__ bool initGrid(int i, int j, int k);
+	__device__ void updateValue(const Index i, const Index j, const Index k);
+	__device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3);
+	__device__ Real fabsMin(const Real x, const Real y);
+
+	tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
+	double* cudaDofVector;
+	double* cudaDofVector2;
+	int counter;
+#endif
+
+	MeshType Mesh;
+
+protected:
+
+
+
+	bool exactInput;
+
+	tnlMeshFunction<MeshType> dofVector;
+	DofVectorType data;
+
+	RealType h;
+
+
+};
+
+
+
+
+
+
+
+#ifdef HAVE_CUDA
+//template<int sweep_t>
+__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver, int sweep, int i);
+
+__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
+
+__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver);
+__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, double tau);
+//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, tnlHost, int >, double, int >* solver);
+#endif
+
+
+
+#include "tnlNarrowBand2D_CUDA_v4_impl.h"
+//											#include "tnlNarrowBand3D_CUDA_impl.h"
+
+#endif /* TNLNARROWBAND_H_ */
-- 
GitLab