diff --git a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt
deleted file mode 100644
index 3f9db0da04526a8d5fc935f4079cc0bca91b3e56..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-set( tnl_fast_sweeping_map_SOURCES
-#     MainBuildConfig.h
-#     tnlFastSweepingMap2D_impl.h
-#     tnlFastSweepingMap.h
-#     fastSweepingMapConfig.h 
-     main.cpp)
-
-
-IF(  BUILD_CUDA ) 
-	CUDA_ADD_EXECUTABLE(fast-sweeping-map main.cu)
-ELSE(  BUILD_CUDA )                
-	ADD_EXECUTABLE(fast-sweeping-map main.cpp)
-ENDIF( BUILD_CUDA )
-target_link_libraries (fast-sweeping-map tnl )
-
-
-INSTALL( TARGETS fast-sweeping-map
-         RUNTIME DESTINATION bin
-         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
-#INSTALL( FILES ${tnl_fast_sweeping_map_SOURCES}
-#         DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping-map )
diff --git a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h
deleted file mode 100644
index ed3d686eb99379af1589d734eac9b5812cccdedf..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
-                          MainBuildConfig.h  -  description
-                             -------------------
-    begin                : Jul 7, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef MAINBUILDCONFIG_H_
-#define MAINBUILDCONFIG_H_
-
-#include <solvers/tnlBuildConfigTags.h>
-
-class MainBuildConfig
-{
-   public:
-
-      static void print() {std::cerr << "MainBuildConfig" <<std::endl; }
-};
-
-/****
- * Turn off support for float and long double.
- */
-template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
-template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
-
-/****
- * Turn off support for short int and long int indexing.
- */
-template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
-template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
-
-/****
- * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
- */
-template< int Dimensions, typename Real, typename Device, typename Index >
-   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
-      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
-                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
-                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
-                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
-
-/****
- * Please, chose your preferred time discretisation  here.
- */
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
-
-/****
- * Only the Runge-Kutta-Merson solver is enabled by default.
- */
-template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
-
-#endif /* MAINBUILDCONFIG_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h b/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h
deleted file mode 100644
index 9251deca876e821dace59682ca1a151555095c69..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/***************************************************************************
-                          fastSweepingConfig.h  -  description
-                             -------------------
-    begin                : Oct 15, 2015
-    copyright            : (C) 2015 by Tomas Sobotik
-    email                :
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef FASTSWEEPINGCONFIG_H_
-#define FASTSWEEPINGCONFIG_H_
-
-#include <config/tnlConfigDescription.h>
-
-template< typename ConfigTag >
-class fastSweepingMapConfig
-{
-   public:
-      static void configSetup( tnlConfigDescription& config )
-      {
-         config.addDelimiter( "Parallel Eikonal solver settings:" );
-         config.addEntry        < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
-         config.addRequiredEntry        < String > ( "initial-condition", "Initial condition for solver");
-         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
-         config.addEntry       < String > ( "mesh", "Name of mesh.", "mesh.tnl" );
-         config.addEntry       < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
-         config.addRequiredEntry        < String > ( "map", "Gradient map for solver");
-      }
-};
-
-#endif /* FASTSWEEPINGCONFIG_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cpp b/src/TNL/Legacy/fast-sweeping-map/main.cpp
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/main.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cu b/src/TNL/Legacy/fast-sweeping-map/main.cu
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/main.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/fast-sweeping-map/main.h b/src/TNL/Legacy/fast-sweeping-map/main.h
deleted file mode 100644
index 6f23851c2ea111712b9d65cfdbb613b04c1e1cdd..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/main.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          main.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-
-#include "MainBuildConfig.h"
-	//for HOST versions:
-#include "tnlFastSweepingMap.h"
-	//for DEVICE versions:
-//#include "tnlFastSweepingMap_CUDA.h"
-#include "fastSweepingMapConfig.h"
-#include <solvers/tnlBuildConfigTags.h>
-
-#include <mesh/tnlGrid.h>
-#include <core/tnlDevice.h>
-#include <time.h>
-#include <ctime>
-
-typedef MainBuildConfig BuildConfig;
-
-int main( int argc, char* argv[] )
-{
-	time_t start;
-	time_t stop;
-	time(&start);
-	std::clock_t start2= std::clock();
-   Config::ParameterContainer parameters;
-   tnlConfigDescription configDescription;
-   fastSweepingMapConfig< BuildConfig >::configSetup( configDescription );
-
-   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
-      return false;
-
-   const int& dim = parameters.getParameter< int >( "dim" );
-
-   if(dim == 2)
-   {
-		tnlFastSweepingMap<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
-		if(!solver.init(parameters))
-	   {
-			cerr << "Solver failed to initialize." <<std::endl;
-			return EXIT_FAILURE;
-	   }
-		TNL_CHECK_CUDA_DEVICE;
-	  std::cout << "-------------------------------------------------------------" <<std::endl;
-	  std::cout << "Starting solver..." <<std::endl;
-	   solver.run();
-   }
-//   else if(dim == 3)
-//   {
-//		tnlFastSweepingMap<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
-//		if(!solver.init(parameters))
-//	   {
-//			cerr << "Solver failed to initialize." <<std::endl;
-//			return EXIT_FAILURE;
-//	   }
-//		TNL_CHECK_CUDA_DEVICE;
-//	  std::cout << "-------------------------------------------------------------" <<std::endl;
-//	  std::cout << "Starting solver..." <<std::endl;
-//	   solver.run();
-//   }
-   else
-   {
-	  std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl;
-	   return EXIT_FAILURE;
-   }
-
-
-   time(&stop);
-  std::cout << "Solver stopped..." <<std::endl;
-  std::cout <<std::endl;
-  std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl;
-   return EXIT_SUCCESS;
-}
-
-
diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h
deleted file mode 100644
index c568329ba2aa5fdb8fed303d43b25e73f210c014..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/***************************************************************************
-                          tnlFastSweepingMap.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING_H_
-#define TNLFASTSWEEPING_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <functions/tnlMeshFunction.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-#ifdef HAVE_OPENMP
-#include <omp.h>
-#endif
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlFastSweepingMap
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-	tnlFastSweepingMap();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-
-	void setupSquare1000(Index i, Index j);
-	void setupSquare1100(Index i, Index j);
-	void setupSquare1010(Index i, Index j);
-	void setupSquare1001(Index i, Index j);
-	void setupSquare1110(Index i, Index j);
-	void setupSquare1101(Index i, Index j);
-	void setupSquare1011(Index i, Index j);
-	void setupSquare1111(Index i, Index j);
-	void setupSquare0000(Index i, Index j);
-	void setupSquare0100(Index i, Index j);
-	void setupSquare0010(Index i, Index j);
-	void setupSquare0001(Index i, Index j);
-	void setupSquare0110(Index i, Index j);
-	void setupSquare0101(Index i, Index j);
-	void setupSquare0011(Index i, Index j);
-	void setupSquare0111(Index i, Index j);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-	int something_changed;
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data,map;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
-
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlFastSweepingMap();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j, const Index k);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity;
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-	//for single core version use this implementation:
-#include "tnlFastSweepingMap2D_impl.h"
-	//for parallel version use this one instead:
-// #include "tnlFastSweepingMap2D_openMP_impl.h"
-
-//											#include "tnlFastSweepingMap3D_impl.h"
-
-#endif /* TNLFASTSWEEPING_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
deleted file mode 100644
index d02b8d6c5d40f8f581e160201c952777ba15aefe..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h
+++ /dev/null
@@ -1,1051 +0,0 @@
-/***************************************************************************
-                          tnlFastSweepingMap2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweepingMap.h"
-
-#define MAP_SOLVER_MAX_VALUE 3
-
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweepingMap< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap()
-:dofVector(Mesh)
-{
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	const String& mapFile = parameters.getParameter <String>("map");
-	if(! this->map.load( mapFile ))
-		cout << "Failed to load map file : " << mapFile <<std::endl;
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	//Entity.refresh();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(map_cuda), this->map.getSize()*sizeof(double));
-	cudaMemcpy(map_cuda, this->map.getData(), this->map.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(changed), sizeof(int));
-	//counter == 0 --> setting changed to 0
-	cudaMemcpy(changed, &counter, sizeof(int), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(1, 1024);
-	dim3 numBlocks(4,1);
-
-	int run = 1;
-	int zero = 0;
-	int cntr = 0;
-
-	while(run != 0)
-	{
-		cudaMemcpy(this->changed, &zero, sizeof(int), cudaMemcpyHostToDevice);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0, this->changed);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		cudaMemcpy(&run, this->changed,sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		cntr++;
-		cout << "Finished set of sweeps #" << cntr << "           " << run <<std::endl;
-	}
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	//data.setLike(dofVector.getData());
-	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	//data.save("u-00001.tnl");
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index* something_changed)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-
-	if(map_cuda[Entity.getIndex()] != 0.0)
-	{
-		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-		Real value = cudaDofVector2[Entity.getIndex()];
-		Real im = abs(1.0/map_cuda[Entity.getIndex()]);
-		Real a,b, tmp;
-
-		if( i == 0 )
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-		else if( i == Mesh.getDimensions().x() - 1 )
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		else
-		{
-			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-		}
-
-		if( j == 0 )
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-		else if( j == Mesh.getDimensions().y() - 1 )
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		else
-		{
-			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-		}
-
-
-		if(abs(a-b) >= im*h)
-			tmp = fabsMin(a,b) + sign(value)*im*h;
-		else
-			tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) );
-
-	//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
-
-		if(abs(value)-abs(tmp) > 0.0)
-			atomicMax(something_changed,1);
-	}
-	else
-	{
-		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), MAP_SOLVER_MAX_VALUE);
-	}
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int i = threadIdx.x + blockDim.x*blockIdx.x;
-	int j = blockDim.y*blockIdx.y + threadIdx.y;
-
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	int gid = Entity.getIndex();
-
-	cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]);
-
-	if(abs(cudaDofVector[gid]) < 1.01*h)
-	{
-		cudaDofVector2[gid] = cudaDofVector[gid];
-		if(map_cuda[gid] != 0.0)
-			cudaDofVector2[gid] /=map_cuda[gid];
-	}
-
-
-
-
-
-//	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() )
-//	{
-//		if(cudaDofVector[Entity.getIndex()] > 0)
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1111(i,j);
-//					else
-//						setupSquare1110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1101(i,j);
-//					else
-//						setupSquare1100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1011(i,j);
-//					else
-//						setupSquare1010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1001(i,j);
-//					else
-//						setupSquare1000(i,j);
-//				}
-//			}
-//		}
-//		else
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0111(i,j);
-//					else
-//						setupSquare0110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0101(i,j);
-//					else
-//						setupSquare0100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0011(i,j);
-//					else
-//						setupSquare0010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0001(i,j);
-//					else
-//						setupSquare0000(i,j);
-//				}
-//			}
-//		}
-//
-//	}
-
-	return true;
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	//Real fy = abs(y);
-
-	//Real tmpMin = Min(fx,abs(y));
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed)
-{
-
-	__shared__ int something_changed;
-	if(threadIdx.x+threadIdx.y == 0)
-		something_changed = 0;
-
-	int gx = 0;
-	int gy = threadIdx.y;
-	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-	//	return;
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	//int id1 = gx+gy;
-	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-	__syncthreads();
-	if(blockIdx.x==0)
-	{
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,&something_changed);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,&something_changed);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,&something_changed);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,&something_changed);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-
-
-	if(threadIdx.x+threadIdx.y == 0)
-		atomicMax(changed, something_changed);
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-
-
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
deleted file mode 100644
index 4bd9e17c5626c7fcbbe0b747a2011d0d74ac9809..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h
+++ /dev/null
@@ -1,823 +0,0 @@
-/***************************************************************************
-                          tnlFastSweepingMap2D_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-
-#define MAP_SOLVER_MAX_VALUE 3
-
-
-#include "tnlFastSweepingMap.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweepingMap< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap()
-:Entity(Mesh),
- dofVector(Mesh),
- dofVector2(Mesh)
-{
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-	dofVector2.load(initialCondition);
-
-	const String& mapFile = parameters.getParameter <String>("map");
-	if(! this->map.load( mapFile ))
-		cout << "Failed to load map file : " << mapFile <<std::endl;
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	Entity.refresh();
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-	cout << "a" <<std::endl;
-
-	something_changed = 1;
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
-	{
-		dofVector2[i]=INT_MAX*sign(dofVector[i]);
-
-		if(abs(dofVector[i]) < 1.01*h)
-		{
-			dofVector2[i] = dofVector[i];
-			if(map[i] != 0.0)
-				dofVector2[i] /= map[i];
-		}
-	}
-
-//	for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++)
-//			{
-//			this->Entity.setCoordinates(CoordinatesType(i,j));
-//			this->Entity.refresh();
-//			neighborEntities.refresh(Mesh,Entity.getIndex());
-//
-//				if(dofVector[this->Entity.getIndex()] > 0)
-//				{
-//					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//					{
-//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare1111(i,j);
-//							else
-//								setupSquare1110(i,j);
-//						}
-//						else
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare1101(i,j);
-//							else
-//								setupSquare1100(i,j);
-//						}
-//					}
-//					else
-//					{
-//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare1011(i,j);
-//							else
-//								setupSquare1010(i,j);
-//						}
-//						else
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare1001(i,j);
-//							else
-//								setupSquare1000(i,j);
-//						}
-//					}
-//				}
-//				else
-//				{
-//					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//					{
-//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare0111(i,j);
-//							else
-//								setupSquare0110(i,j);
-//						}
-//						else
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare0101(i,j);
-//							else
-//								setupSquare0100(i,j);
-//						}
-//					}
-//					else
-//					{
-//						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare0011(i,j);
-//							else
-//								setupSquare0010(i,j);
-//						}
-//						else
-//						{
-//							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//								setupSquare0001(i,j);
-//							else
-//								setupSquare0000(i,j);
-//						}
-//					}
-//				}
-//
-//			}
-//	}
-	cout << "a" <<std::endl;
-
-	//data.setLike(dofVector2.getData());
-	//data=dofVector2.getData();
-	//cout << data.getType() <<std::endl;
-	dofVector2.save("u-00000.tnl");
-	//dofVector2.getData().save("u-00000.tnl");
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-	int cntr = 0;
-	while(something_changed != 0)
-	{
-		something_changed = 0;
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j);
-			}
-		}
-
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j);
-			}
-		}
-
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j);
-			}
-		}
-
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j);
-			}
-		}
-
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-		cntr++;
-		cout << "Finished set of sweeps #" << cntr << "           " << something_changed <<std::endl;
-	}
-
-
-
-	dofVector2.save("u-00001.tnl");
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	if(map[Entity.getIndex()] != 0.0)
-	{
-		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-		Real value = dofVector2[Entity.getIndex()];
-		Real im = abs(1.0/map[Entity.getIndex()]);
-		Real a,b, tmp;
-
-		if( i == 0 )
-			a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-		else if( i == Mesh.getDimensions().x() - 1 )
-			a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		else
-		{
-			a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-					 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-		}
-
-		if( j == 0 )
-			b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-		else if( j == Mesh.getDimensions().y() - 1 )
-			b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		else
-		{
-			b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-					 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-		}
-
-
-		if(fabs(a-b) >= im*h)
-			tmp = fabsMin(a,b) + sign(value)*im*h;
-		else
-			tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) );
-
-		if(abs(value)-abs(tmp) > 0.0)
-			something_changed = 1;
-
-		dofVector2[Entity.getIndex()] = fabsMin(value, tmp);
-
-	}
-	else
-	{
-		dofVector2[Entity.getIndex()] = MAP_SOLVER_MAX_VALUE;
-	}
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
deleted file mode 100644
index a23057e78c745e74467db4c4190d6f217024bc5a..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/***************************************************************************
-                          tnlFastSweepingMap_CUDA.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING_H_
-#define TNLFASTSWEEPING_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-
-#include <functions/tnlMeshFunction.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlFastSweepingMap
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlFastSweepingMap();
-
-	__host__ static String getType();
-	__host__ bool init( const Config::ParameterContainer& parameters );
-	__host__ bool run();
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid();
-	__device__ void updateValue(const Index i, const Index j, Index* something_changed);
-	__device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	double* map_cuda;
-	int counter;
-	int* changed;
-	__device__ void setupSquare1000(Index i, Index j);
-	__device__ void setupSquare1100(Index i, Index j);
-	__device__ void setupSquare1010(Index i, Index j);
-	__device__ void setupSquare1001(Index i, Index j);
-	__device__ void setupSquare1110(Index i, Index j);
-	__device__ void setupSquare1101(Index i, Index j);
-	__device__ void setupSquare1011(Index i, Index j);
-	__device__ void setupSquare1111(Index i, Index j);
-	__device__ void setupSquare0000(Index i, Index j);
-	__device__ void setupSquare0100(Index i, Index j);
-	__device__ void setupSquare0010(Index i, Index j);
-	__device__ void setupSquare0001(Index i, Index j);
-	__device__ void setupSquare0110(Index i, Index j);
-	__device__ void setupSquare0101(Index i, Index j);
-	__device__ void setupSquare0011(Index i, Index j);
-	__device__ void setupSquare0111(Index i, Index j);
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data, map;
-
-
-	RealType h;
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-
-	__host__ static String getType();
-	__host__ bool init( const Config::ParameterContainer& parameters );
-	__host__ bool run();
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid(int i, int j, int k);
-	__device__ void updateValue(const Index i, const Index j, const Index k);
-	__device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	int counter;
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data;
-
-	RealType h;
-
-
-};
-
-
-
-
-
-
-
-#ifdef HAVE_CUDA
-//template<int sweep_t>
-__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed);
-//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-
-__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
-#endif
-
-/*various implementtions.... choose one*/
-//#include "tnlFastSweepingMap2D_CUDA_impl.h"
-//#include "tnlFastSweepingMap2D_CUDA_v2_impl.h"
-//#include "tnlFastSweepingMap2D_CUDA_v3_impl.h"
-#include "tnlFastSweepingMap2D_CUDA_v4_impl.h"
-//#include "tnlFastSweepingMap2D_CUDA_v5_impl.h"
-
-
-//															#include "tnlFastSweepingMap3D_CUDA_impl.h"
-
-#endif /* TNLFASTSWEEPING_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping/CMakeLists.txt
deleted file mode 100644
index 1a23d646a43090c0a63216b43c317c14ab0903d3..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-set( tnl_fast_sweeping_SOURCES
-#     MainBuildConfig.h
-#     tnlFastSweeping2D_impl.h
-#     tnlFastSweeping.h
-#     fastSweepingConfig.h 
-     main.cpp)
-
-
-IF(  BUILD_CUDA ) 
-	CUDA_ADD_EXECUTABLE(fast-sweeping main.cu)
-ELSE(  BUILD_CUDA )                
-	ADD_EXECUTABLE(fast-sweeping main.cpp)
-ENDIF( BUILD_CUDA )
-target_link_libraries (fast-sweeping tnl )
-
-
-INSTALL( TARGETS fast-sweeping
-         RUNTIME DESTINATION bin
-         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
-#INSTALL( FILES ${tnl_fast_sweeping_SOURCES}
-#         DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping )
diff --git a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h
deleted file mode 100644
index ed3d686eb99379af1589d734eac9b5812cccdedf..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
-                          MainBuildConfig.h  -  description
-                             -------------------
-    begin                : Jul 7, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef MAINBUILDCONFIG_H_
-#define MAINBUILDCONFIG_H_
-
-#include <solvers/tnlBuildConfigTags.h>
-
-class MainBuildConfig
-{
-   public:
-
-      static void print() {std::cerr << "MainBuildConfig" <<std::endl; }
-};
-
-/****
- * Turn off support for float and long double.
- */
-template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
-template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
-
-/****
- * Turn off support for short int and long int indexing.
- */
-template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
-template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
-
-/****
- * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
- */
-template< int Dimensions, typename Real, typename Device, typename Index >
-   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
-      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
-                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
-                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
-                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
-
-/****
- * Please, chose your preferred time discretisation  here.
- */
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
-
-/****
- * Only the Runge-Kutta-Merson solver is enabled by default.
- */
-template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
-
-#endif /* MAINBUILDCONFIG_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h b/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h
deleted file mode 100644
index 3df2c1e889050448fc07baf7dcd0e32feab3f778..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/***************************************************************************
-                          fastSweepingConfig.h  -  description
-                             -------------------
-    begin                : Oct 15, 2015
-    copyright            : (C) 2015 by Tomas Sobotik
-    email                :
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef FASTSWEEPINGCONFIG_H_
-#define FASTSWEEPINGCONFIG_H_
-
-#include <config/tnlConfigDescription.h>
-
-template< typename ConfigTag >
-class fastSweepingConfig
-{
-   public:
-      static void configSetup( tnlConfigDescription& config )
-      {
-         config.addDelimiter( "Parallel Eikonal solver settings:" );
-         config.addEntry        < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
-         config.addRequiredEntry        < String > ( "initial-condition", "Initial condition for solver");
-         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
-         config.addEntry       < String > ( "mesh", "Name of mesh.", "mesh.tnl" );
-         config.addEntry       < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
-      }
-};
-
-#endif /* FASTSWEEPINGCONFIG_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/main.cpp b/src/TNL/Legacy/fast-sweeping/main.cpp
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/main.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/fast-sweeping/main.cu b/src/TNL/Legacy/fast-sweeping/main.cu
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/main.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/fast-sweeping/main.h b/src/TNL/Legacy/fast-sweeping/main.h
deleted file mode 100644
index e5ac15fede2281abbd31320985de93671d63d178..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/main.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          main.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-
-#include "MainBuildConfig.h"
-	//for HOST versions:
-#include "tnlFastSweeping.h"
-	//for DEVICE versions:
-//#include "tnlFastSweeping_CUDA.h"
-#include "fastSweepingConfig.h"
-#include <solvers/tnlBuildConfigTags.h>
-
-#include <mesh/tnlGrid.h>
-#include <core/tnlDevice.h>
-#include <time.h>
-#include <ctime>
-
-typedef MainBuildConfig BuildConfig;
-
-int main( int argc, char* argv[] )
-{
-	time_t start;
-	time_t stop;
-	time(&start);
-	std::clock_t start2= std::clock();
-   Config::ParameterContainer parameters;
-   tnlConfigDescription configDescription;
-   fastSweepingConfig< BuildConfig >::configSetup( configDescription );
-
-   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
-      return false;
-
-   const int& dim = parameters.getParameter< int >( "dim" );
-
-   if(dim == 2)
-   {
-		tnlFastSweeping<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
-		if(!solver.init(parameters))
-	   {
-			cerr << "Solver failed to initialize." <<std::endl;
-			return EXIT_FAILURE;
-	   }
-		TNL_CHECK_CUDA_DEVICE;
-	  std::cout << "-------------------------------------------------------------" <<std::endl;
-	  std::cout << "Starting solver..." <<std::endl;
-	   solver.run();
-   }
-   else if(dim == 3)
-   {
-		tnlFastSweeping<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
-		if(!solver.init(parameters))
-	   {
-			cerr << "Solver failed to initialize." <<std::endl;
-			return EXIT_FAILURE;
-	   }
-		TNL_CHECK_CUDA_DEVICE;
-	  std::cout << "-------------------------------------------------------------" <<std::endl;
-	  std::cout << "Starting solver..." <<std::endl;
-	   solver.run();
-   }
-   else
-   {
-	  std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl;
-	   return EXIT_FAILURE;
-   }
-
-
-   time(&stop);
-  std::cout << "Solver stopped..." <<std::endl;
-  std::cout <<std::endl;
-  std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl;
-   return EXIT_SUCCESS;
-}
-
-
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h
deleted file mode 100644
index 96d26db7b5a2077d8e2199292f0e888b0171a5c2..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING_H_
-#define TNLFASTSWEEPING_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <functions/tnlMeshFunction.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-#ifdef HAVE_OPENMP
-#include <omp.h>
-#endif
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlFastSweeping
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-	tnlFastSweeping();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-
-	void setupSquare1000(Index i, Index j);
-	void setupSquare1100(Index i, Index j);
-	void setupSquare1010(Index i, Index j);
-	void setupSquare1001(Index i, Index j);
-	void setupSquare1110(Index i, Index j);
-	void setupSquare1101(Index i, Index j);
-	void setupSquare1011(Index i, Index j);
-	void setupSquare1111(Index i, Index j);
-	void setupSquare0000(Index i, Index j);
-	void setupSquare0100(Index i, Index j);
-	void setupSquare0010(Index i, Index j);
-	void setupSquare0001(Index i, Index j);
-	void setupSquare0110(Index i, Index j);
-	void setupSquare0101(Index i, Index j);
-	void setupSquare0011(Index i, Index j);
-	void setupSquare0111(Index i, Index j);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
-
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlFastSweeping();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j, const Index k);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity;
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-	//for single core version use this implementation:
-#include "tnlFastSweeping2D_impl.h"
-	//for parallel version use this one instead:
-// #include "tnlFastSweeping2D_openMP_impl.h"
-
-#include "tnlFastSweeping3D_impl.h"
-
-#endif /* TNLFASTSWEEPING_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h
deleted file mode 100644
index bc1da169c01466a69c00b24e450e5eba09aacd1a..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h
+++ /dev/null
@@ -1,522 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.getSpaceSteps().x();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-//
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//
-//	dofVector.save("u-00001.tnl");
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(32, 32);
-	dim3 numBlocks(n/32 + 1 ,n/32 +1);
-
-	for(int i = 2*n - 1; i > -1; i--)
-	{
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i);
-		cudaDeviceSynchronize();
-	}
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	for(int i = 0; i < 2*n ; i++)
-	{
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i);
-		cudaDeviceSynchronize();
-	}
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	for(int i = 0; i < 2*n ; i++)
-	{
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i);
-		cudaDeviceSynchronize();
-	}
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	for(int i = 2*n - 1; i > -1; i--)
-	{
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,i);
-		cudaDeviceSynchronize();
-	}
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = cudaDofVector[index];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)];
-	else
-	{
-		a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)];
-	else
-	{
-		b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	cudaDofVector[index]  = fabsMin(value, tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gid = Mesh.getCellIndex(CoordinatesType(gx,gy));
-
-	int total = blockDim.x*gridDim.x;
-
-
-
-	Real tmp = 0.0;
-	int flag = 0;
-	counter = 0;
-	tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-
-
-	if(!exactInput)
-	{
-		cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]);
-	}
-	__threadfence();
-//	printf("-----------------------------------------------------------------------------------\n");
-
-	__threadfence();
-
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1)
-	{
-		if(gy > 0 && gy < Mesh.getDimensions().y()-1)
-		{
-
-			Index j = gy;
-			Index i = gx;
-//			 tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-			if(tmp == 0.0)
-			{}
-			else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-			{}
-			else
-				flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-		}
-	}
-
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-//	printf("****************************************************************\n");
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0)
-	{
-//		printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-		Index j = 0;
-		Index i = gx;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1)
-	{
-		Index i = gx;
-		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0)
-	{
-		Index j = gy;
-		Index i = 0;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-//	printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1  && gx == Mesh.getDimensions().x() - 1)
-	{
-		Index j = gy;
-		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("##################################################################################################\n");
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == 0)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-//	printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
-	if(gx == 0 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == 0 &&
-	   gy == 0)
-	{
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-
-	__threadfence();
-
-	if(flag==1)
-		cudaDofVector[gid] =  tmp*3;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	Real fy = abs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-		return;
-	int total = solver->Mesh.getDimensions().x();
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	int id1 = gx+gy;
-	int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-	if(sweep == 1)
-//	for(int i = 0; i < 2*total - 1; i++)
-	{
-		if(id1 == i)
-		{
-			solver->updateValue(gx,gy);
-			return;
-		}
-
-	}
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-	else if(sweep == 2)
-//	for(int i = 0; i < 2*total - 1; i++)
-	{
-		if(id2 == i)
-		{
-			solver->updateValue(gx,gy);
-			return;
-		}
-	}
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-	else if(sweep == 3)
-//	for(int i = 2*total - 2; i > -1; i--)
-	{
-		if(id1 == i)
-		{
-			solver->updateValue(gx,gy);
-			return;
-		}
-	}
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-	else if(sweep == 4)
-//	for(int i = 2*total - 2; i > -1; i--)
-	{
-		if(id2 == i)
-		{
-			solver->updateValue(gx,gy);
-			return;
-		}
-	}
-	/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h
deleted file mode 100644
index 3ad5b7944f839f794f695240e7abba59e23d16b4..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h
+++ /dev/null
@@ -1,588 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_v2_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.getSpaceSteps().x();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-//
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//
-//	dofVector.save("u-00001.tnl");
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(27, 27);
-	dim3 numBlocks(1 ,1);
-
-//	for(int i = 2*n - 1; i > -1; i--)
-	{
-		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,0);
-		cudaDeviceSynchronize();
-	}
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-////	for(int i = 0; i < 2*n ; i++)
-//	{
-//		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,0);
-//		cudaDeviceSynchronize();
-//	}
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-////	for(int i = 0; i < 2*n ; i++)
-//	{
-//		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,0);
-//		cudaDeviceSynchronize();
-//	}
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-////	for(int i = 2*n - 1; i > -1; i--)
-//	{
-//		runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,0);
-//		cudaDeviceSynchronize();
-//	}
-//
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = cudaDofVector[index];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)];
-	else
-	{
-		a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)];
-	else
-	{
-		b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	cudaDofVector[index]  = fabsMin(value, tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gid = Mesh.getCellIndex(CoordinatesType(gx,gy));
-
-	int total = blockDim.x*gridDim.x;
-
-
-
-	Real tmp = 0.0;
-	int flag = 0;
-	counter = 0;
-	tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-
-
-	if(!exactInput)
-	{
-		cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]);
-	}
-	__threadfence();
-//	printf("-----------------------------------------------------------------------------------\n");
-
-	__threadfence();
-
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1)
-	{
-		if(gy > 0 && gy < Mesh.getDimensions().y()-1)
-		{
-
-			Index j = gy;
-			Index i = gx;
-//			 tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-			if(tmp == 0.0)
-			{}
-			else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-			{}
-			else
-				flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-		}
-	}
-
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-//	printf("****************************************************************\n");
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0)
-	{
-//		printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-		Index j = 0;
-		Index i = gx;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1)
-	{
-		Index i = gx;
-		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0)
-	{
-		Index j = gy;
-		Index i = 0;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-//	printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1  && gx == Mesh.getDimensions().x() - 1)
-	{
-		Index j = gy;
-		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("##################################################################################################\n");
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == 0)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-//	printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
-	if(gx == 0 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == 0 &&
-	   gy == 0)
-	{
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-
-	__threadfence();
-
-	if(flag==1)
-		cudaDofVector[gid] =  tmp*3;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-
-	Real tmpMin = Min(fx,abs(y));
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-{
-
-	//int gx = threadIdx.x;
-	//int gy = threadIdx.y;
-	int id1,id2;
-	int nx = solver->Mesh.getDimensions().x()+ threadIdx.x;
-	int ny = solver->Mesh.getDimensions().y()+ threadIdx.y;
-
-	int blockCount = solver->Mesh.getDimensions().x()/blockDim.x + 1;
-
-	for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y)
-	{
-		for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x)
-		{
-//			if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1)
-			{
-				id1 = threadIdx.x+threadIdx.y;
-
-				for(int l = 0; l < 2*blockDim.x - 1; l++)
-				{
-					if(id1 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-					}
-					__syncthreads();
-				}
-
-			}
-			//gx+=blockDim.x;
-			//__syncthreads();
-		}
-		//gx = threadIdx.x;
-		//gy+=blockDim.y;
-		//__syncthreads();
-	}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-//	gx = blockDim.x*(blockCount-1) + threadIdx.x;
-//	gy = threadIdx.y;
-	for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y)
-	{
-		for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x)
-		{
-//			if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1)
-			{
-				id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-				for(int l = 0; l < 2*blockDim.x - 1; l++)
-				{
-					if(id2 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-					}
-					__syncthreads();
-				}
-			}
-			//gx-=blockDim.x;
-			//__syncthreads();
-		}
-		//gx = blockDim.x*(blockCount-1) + threadIdx.x;
-		//gy+=blockDim.y;
-		//__syncthreads();
-	}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-//	gx = blockDim.x*(blockCount-1) + threadIdx.x;
-//	gy = blockDim.x*(blockCount-1) + threadIdx.y;
-	for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y)
-	{
-		for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x)
-		{
-//			if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1)
-			{
-				id1 = threadIdx.x+threadIdx.y;
-
-				for(int l = 2*blockDim.x - 2; l > -1; l--)
-				{
-					if(id1 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-					}
-					__syncthreads();
-				}
-			}
-			//gx-=blockDim.x;
-			//__syncthreads();
-		}
-		//gx = blockDim.x*(blockCount-1) + threadIdx.x;
-		//gy-=blockDim.y;
-		//__syncthreads();
-	}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-	//gx = threadIdx.x;
-	//gy = blockDim.x*(blockCount-1) +threadIdx.y;
-	for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y)
-	{
-		for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x)
-		{
-//			if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1)
-			{
-				id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-				for(int l = 2*blockDim.x - 2; l > -1; l--)
-				{
-					if(id2 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-					}
-					__syncthreads();
-				}
-			}
-			//gx+=blockDim.x;
-			//__syncthreads();
-		}
-		//gx = threadIdx.x;
-		//gy-=blockDim.y;
-		///__syncthreads();
-	}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h
deleted file mode 100644
index ff36d3f8e0a73e5a0987b06f81be2650b277fa25..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h
+++ /dev/null
@@ -1,920 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_v3_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-
-
-
-__device__ double atomicSet(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong(val ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.getSpaceSteps().x();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-//
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//
-//	dofVector.save("u-00001.tnl");
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 +1 ,n/16 +1);
-	int m =n/16 +1;
-
-	for(int i = 0; i < 2*m -1; i++)
-	{
-		runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i);
-		//cudaDeviceSynchronize();
-	}
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-//	for(int i = 0; i < 2*m -1; i++)
-//	{
-//		runCUDA<2><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i);
-//		cudaDeviceSynchronize();
-//	}
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-//	for(int i = 0; i < 2*m -1; i++)
-//	{
-//		runCUDA<4><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i);
-//		cudaDeviceSynchronize();
-//	}
-//	cudaDeviceSynchronize();
-//	TNL_CHECK_CUDA_DEVICE;
-//	for(int i = 0; i < 2*m -1; i++)
-//	{
-//		runCUDA<8><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,8,i);
-//		cudaDeviceSynchronize();
-//	}
-
-
-
-
-//	for(int i = 0; i < (2*m -1)/4 -1; i++)
-//	{
-//		runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,15,i);//all
-//		cudaDeviceSynchronize();
-//	}
-//	for(int i = (2*m -1)/4 -1; i < (2*m -1)/2 -1; i++)
-//	{
-//		runCUDA<5><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,5,i); //two
-//		cudaDeviceSynchronize();
-//		runCUDA<10><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,10,i); //two
-//		cudaDeviceSynchronize();
-//	}
-//	for(int i = (2*m -1)/2 -1; i < (2*m -1)/2 +1; i++)
-//	{
-//		runCUDA<1><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); //separate
-//		cudaDeviceSynchronize();
-//		runCUDA<2><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); //separate
-//		cudaDeviceSynchronize();
-//		runCUDA<4><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); //separate
-//		cudaDeviceSynchronize();
-//		runCUDA<8><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,8,i); //separate
-//		cudaDeviceSynchronize();
-//	}
-//	for(int i = (2*m -1)/2 +1; i < (2*m -1/4)*3 +1; i++)
-//	{
-//		runCUDA<5><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,5,i); //two
-//		cudaDeviceSynchronize();
-//		runCUDA<10><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,10,i); //two
-//		cudaDeviceSynchronize();
-//	}
-//	for(int i = (2*m -1/4)*3 +1; i < 2*m -1; i++)
-//	{
-//		runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,15,i);//all
-//		cudaDeviceSynchronize();
-//	}
-cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = cudaDofVector[index];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)];
-	else
-	{
-		a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)];
-	else
-	{
-		b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	atomicSet(&cudaDofVector[index],fabsMin(value, tmp));
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gid = Mesh.getCellIndex(CoordinatesType(gx,gy));
-
-	int total = blockDim.x*gridDim.x;
-
-
-
-	Real tmp = 0.0;
-	int flag = 0;
-	counter = 0;
-	tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-
-
-	if(!exactInput)
-	{
-		cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]);
-	}
-	__threadfence();
-//	printf("-----------------------------------------------------------------------------------\n");
-
-	__threadfence();
-
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1)
-	{
-		if(gy > 0 && gy < Mesh.getDimensions().y()-1)
-		{
-
-			Index j = gy;
-			Index i = gx;
-//			 tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-			if(tmp == 0.0)
-			{}
-			else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-			{}
-			else
-				flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-		}
-	}
-
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-//	printf("****************************************************************\n");
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0)
-	{
-//		printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-		Index j = 0;
-		Index i = gx;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1)
-	{
-		Index i = gx;
-		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0)
-	{
-		Index j = gy;
-		Index i = 0;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-//	printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1  && gx == Mesh.getDimensions().x() - 1)
-	{
-		Index j = gy;
-		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("##################################################################################################\n");
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == 0)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-//	printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
-	if(gx == 0 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == 0 &&
-	   gy == 0)
-	{
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-
-	__threadfence();
-
-	if(flag==1)
-		cudaDofVector[gid] =  tmp*3;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-//	Real fx = abs(x);
-//
-//	Real tmpMin = Min(fx,abs(y));
-
-	if(abs(y) > abs(x))
-		return x;
-	else
-		return y;
-
-
-}
-
-
-template<>
-__global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-{
-
-	if(blockIdx.x+blockIdx.y == k)
-	{
-		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-		int id1 = threadIdx.x+threadIdx.y;
-
-						for(int l = 0; l < 2*blockDim.x - 1; l++)
-						{
-							if(id1 == l)
-							{
-								if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-								solver->updateValue(gx,gy);
-							}
-							__syncthreads();
-						}
-
-	}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-}
-	template<>
-	__global__ void runCUDA<2>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-	{
-	if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
-	{
-		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-		int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-				for(int l = 0; l < 2*blockDim.x - 1; l++)
-				{
-					if(id2 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-					}
-					__syncthreads();
-				}
-
-	}
-	}			/*---------------------------------------------------------------------------------------------------------------------------*/
-	template<>
-	__global__ void runCUDA<4>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-	{
-	if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2)
-		{
-		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-		int id1 = threadIdx.x+threadIdx.y;
-
-				for(int l = 2*blockDim.x - 2; l > -1; l--)
-				{
-					if(id1 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-						return;
-					}
-					__syncthreads();
-				}
-
-		}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	}
-
-	template<>
-	__global__ void runCUDA<8>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-	{
-	if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2)
-		{
-		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-		int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-				for(int l = 2*blockDim.x - 2; l > -1; l--)
-				{
-					if(id2 == l)
-					{
-						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-						solver->updateValue(gx,gy);
-						return;
-					}
-					__syncthreads();
-				}
-
-		}
-			/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-
-}
-
-
-	template<>
-		__global__ void runCUDA<5>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-		{
-
-			if(blockIdx.x+blockIdx.y == k)
-			{
-				int gx = threadIdx.x + blockDim.x*blockIdx.x;
-				int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-				int id1 = threadIdx.x+threadIdx.y;
-
-								for(int l = 0; l < 2*blockDim.x - 1; l++)
-								{
-									if(id1 == l)
-									{
-										if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-										solver->updateValue(gx,gy);
-										return;
-									}
-									__syncthreads();
-								}
-
-			}
-			else if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2)
-				{
-				int gx = threadIdx.x + blockDim.x*blockIdx.x;
-				int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-				int id1 = threadIdx.x+threadIdx.y;
-
-						for(int l = 2*blockDim.x - 2; l > -1; l--)
-						{
-							if(id1 == l)
-							{
-								if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-								solver->updateValue(gx,gy);
-								return;
-							}
-							__syncthreads();
-						}
-
-				}
-		}
-
-
-	template<>
-		__global__ void runCUDA<10>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-		{
-			if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
-			{
-				int gx = threadIdx.x + blockDim.x*blockIdx.x;
-				int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-				int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-						for(int l = 0; l < 2*blockDim.x - 1; l++)
-						{
-							if(id2 == l)
-							{
-								if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-								solver->updateValue(gx,gy);
-								return;
-							}
-							__syncthreads();
-						}
-
-			}
-
-			else if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2)
-				{
-				int gx = threadIdx.x + blockDim.x*blockIdx.x;
-				int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-				int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-						for(int l = 2*blockDim.x - 2; l > -1; l--)
-						{
-							if(id2 == l)
-							{
-								if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-								solver->updateValue(gx,gy);
-								return;
-							}
-							__syncthreads();
-						}
-
-				}
-
-		}
-
-
-
-	template<>
-	__global__ void runCUDA<15>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-	{
-
-		if(blockIdx.x+blockIdx.y == k)
-		{
-			int gx = threadIdx.x + blockDim.x*blockIdx.x;
-			int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-			int id1 = threadIdx.x+threadIdx.y;
-
-							for(int l = 0; l < 2*blockDim.x - 1; l++)
-							{
-								if(id1 == l)
-								{
-									if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-									solver->updateValue(gx,gy);
-									return;
-								}
-								__syncthreads();
-							}
-
-		}
-				/*---------------------------------------------------------------------------------------------------------------------------*/
-
-		if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
-		{
-			int gx = threadIdx.x + blockDim.x*blockIdx.x;
-			int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-			int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-					for(int l = 0; l < 2*blockDim.x - 1; l++)
-					{
-						if(id2 == l)
-						{
-							if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-							solver->updateValue(gx,gy);
-							return;
-						}
-						__syncthreads();
-					}
-
-		}
-				/*---------------------------------------------------------------------------------------------------------------------------*/
-
-		if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2)
-			{
-			int gx = threadIdx.x + blockDim.x*blockIdx.x;
-			int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-			int id1 = threadIdx.x+threadIdx.y;
-
-					for(int l = 2*blockDim.x - 2; l > -1; l--)
-					{
-						if(id1 == l)
-						{
-							if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-							solver->updateValue(gx,gy);
-							return;
-						}
-						__syncthreads();
-					}
-
-			}
-				/*---------------------------------------------------------------------------------------------------------------------------*/
-
-		if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2)
-			{
-			int gx = threadIdx.x + blockDim.x*blockIdx.x;
-			int gy = threadIdx.y + blockDim.y*blockIdx.y;
-
-			int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-
-					for(int l = 2*blockDim.x - 2; l > -1; l--)
-					{
-						if(id2 == l)
-						{
-							if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-							solver->updateValue(gx,gy);
-							return;
-						}
-						__syncthreads();
-					}
-
-			}
-				/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-
-	}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-#endif
-
-
-
-
-
-
-//__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k)
-//{
-//
-//	if(sweep==1 && blockIdx.x+blockIdx.y == k)
-//	{
-//		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-//		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-//
-//		int id1 = threadIdx.x+threadIdx.y;
-//
-//						for(int l = 0; l < 2*blockDim.x - 1; l++)
-//						{
-//							if(id1 == l)
-//							{
-//								if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-//								solver->updateValue(gx,gy);
-//							}
-//							__syncthreads();
-//						}
-//
-//	}
-//			/*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	else if(sweep==2 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == k)
-//	{
-//		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-//		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-//
-//		int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-//
-//				for(int l = 0; l < 2*blockDim.x - 1; l++)
-//				{
-//					if(id2 == l)
-//					{
-//						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-//						solver->updateValue(gx,gy);
-//					}
-//					__syncthreads();
-//				}
-//
-//	}
-//			/*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	else if(sweep==4 && blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2)
-//		{
-//		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-//		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-//
-//		int id1 = threadIdx.x+threadIdx.y;
-//
-//				for(int l = 2*blockDim.x - 2; l > -1; l--)
-//				{
-//					if(id1 == l)
-//					{
-//						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-//						solver->updateValue(gx,gy);
-//						return;
-//					}
-//					__syncthreads();
-//				}
-//
-//		}
-//			/*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	else if(sweep==8 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2)
-//		{
-//		int gx = threadIdx.x + blockDim.x*blockIdx.x;
-//		int gy = threadIdx.y + blockDim.y*blockIdx.y;
-//
-//		int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y;
-//
-//				for(int l = 2*blockDim.x - 2; l > -1; l--)
-//				{
-//					if(id2 == l)
-//					{
-//						if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/)
-//						solver->updateValue(gx,gy);
-//						return;
-//					}
-//					__syncthreads();
-//				}
-//
-//		}
-//			/*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//
-//
-//
-//
-//}
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h
deleted file mode 100644
index e0a9697c2e5ea4097232b4d4a8f3c6da6fa41e50..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h
+++ /dev/null
@@ -1,1003 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping()
-:dofVector(Mesh)
-{
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	//Entity.refresh();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(1, 1024);
-	dim3 numBlocks(4,1);
-
-
-	runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	//data.setLike(dofVector.getData());
-	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	//data.save("u-00001.tnl");
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	Real value = cudaDofVector2[Entity.getIndex()];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-	else
-	{
-		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-	else
-	{
-		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-	atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int i = threadIdx.x + blockDim.x*blockIdx.x;
-	int j = blockDim.y*blockIdx.y + threadIdx.y;
-
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	int gid = Entity.getIndex();
-
-	cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]);
-//
-//	if(abs(cudaDofVector[gid]) < 1.01*h)
-//		cudaDofVector2[gid] = cudaDofVector[gid];
-
-
-
-
-
-	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() && !exactInput)
-	{
-		if(cudaDofVector[Entity.getIndex()] > 0)
-		{
-			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-			{
-				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare1111(i,j);
-					else
-						setupSquare1110(i,j);
-				}
-				else
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare1101(i,j);
-					else
-						setupSquare1100(i,j);
-				}
-			}
-			else
-			{
-				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare1011(i,j);
-					else
-						setupSquare1010(i,j);
-				}
-				else
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare1001(i,j);
-					else
-						setupSquare1000(i,j);
-				}
-			}
-		}
-		else
-		{
-			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-			{
-				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare0111(i,j);
-					else
-						setupSquare0110(i,j);
-				}
-				else
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare0101(i,j);
-					else
-						setupSquare0100(i,j);
-				}
-			}
-			else
-			{
-				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare0011(i,j);
-					else
-						setupSquare0010(i,j);
-				}
-				else
-				{
-					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-						setupSquare0001(i,j);
-					else
-						setupSquare0000(i,j);
-				}
-			}
-		}
-
-	}
-	if(exactInput)
-	{
-		if(abs(cudaDofVector[gid]) < 1.5*h)
-			cudaDofVector2[gid] = cudaDofVector[gid];
-	}
-
-
-	return true;
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	//Real fy = abs(y);
-
-	//Real tmpMin = Min(fx,abs(y));
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-	int gx = 0;
-	int gy = threadIdx.y;
-	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-	//	return;
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	//int id1 = gx+gy;
-	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-	if(blockIdx.x==0)
-	{
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-
-
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-//	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-//	Entity.setCoordinates(CoordinatesType(i,j));
-//	Entity.refresh();
-//	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-//	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-//	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-//	Entity.setCoordinates(CoordinatesType(i,j));
-//	Entity.refresh();
-//	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-//	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=INT_MAX;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-INT_MAX;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=0.5*h;	//fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b = 1.0;
-	c = -be;
-	s = h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=-0.5*h;	//fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=0.5*h;	//fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=-0.5*h;	//fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=-0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=0.5*h;	//fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h
deleted file mode 100644
index 1591bb6137007b88e636a83bb113030ecd529f9c..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h
+++ /dev/null
@@ -1,697 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_v5_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.getSpaceSteps().x();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);
-
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-//
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-//	{
-//		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-//		{
-//			updateValue(i,j);
-//		}
-//	}
-//
-///*---------------------------------------------------------------------------------------------------------------------------*/
-//
-//
-//	dofVector.save("u-00001.tnl");
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(1, 512);
-	dim3 numBlocks(4,1);
-
-
-	runCUDA<<<numBlocks,threadsPerBlock,3*(512+1)*sizeof(double)>>>(this->cudaSolver,0,0);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = cudaDofVector[index];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)];
-	else
-	{
-		a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)];
-	else
-	{
-		b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)],
-				 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	cudaDofVector[index]  = fabsMin(value, tmp);
-
-}
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, double** sharedMem, int k3)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = sharedMem[k3+1][threadIdx.y];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = sharedMem[k3][threadIdx.y];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = sharedMem[k3+2][threadIdx.y];
-	else
-	{
-		a = fabsMin( sharedMem[k3][threadIdx.y],
-				sharedMem[k3+2][threadIdx.y] );
-	}
-
-	if( j == 0 )
-		b = sharedMem[k3][threadIdx.y+1];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = sharedMem[k3+2][threadIdx.y-1];
-	else
-	{
-		b = fabsMin( sharedMem[k3][threadIdx.y+1],
-				sharedMem[k3+2][threadIdx.y-1] );
-	}
-
-
-	if(abs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-//	sharedMem[k3+1][threadIdx.y] = this->fabsMin(value, tmp);
-//	atomicFabsMin(&(cudaDofVector[index]), tmp);
-	cudaDofVector[index]  = tmp;
-	this->fabsMin(value, tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gid = Mesh.getCellIndex(CoordinatesType(gx,gy));
-
-	int total = blockDim.x*gridDim.x;
-
-
-
-	Real tmp = 0.0;
-	int flag = 0;
-	counter = 0;
-	tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-
-
-	if(!exactInput)
-	{
-		cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]);
-	}
-	__threadfence();
-//	printf("-----------------------------------------------------------------------------------\n");
-
-	__threadfence();
-
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1)
-	{
-		if(gy > 0 && gy < Mesh.getDimensions().y()-1)
-		{
-
-			Index j = gy;
-			Index i = gx;
-//			 tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-			if(tmp == 0.0)
-			{}
-			else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-					cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-			{}
-			else
-				flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-		}
-	}
-
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-//	printf("****************************************************************\n");
-//	printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0)
-	{
-//		printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid);
-		Index j = 0;
-		Index i = gx;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
-	if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1)
-	{
-		Index i = gx;
-		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0)
-	{
-		Index j = gy;
-		Index i = 0;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-//	printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
-	if(gy > 0 && gy < Mesh.getDimensions().y()-1  && gx == Mesh.getDimensions().x() - 1)
-	{
-		Index j = gy;
-		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-//	printf("##################################################################################################\n");
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == Mesh.getDimensions().x() - 1 &&
-	   gy == 0)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-//	printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
-	if(gx == 0 &&
-	   gy == Mesh.getDimensions().y() - 1)
-	{
-
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-	if(gx == 0 &&
-	   gy == 0)
-	{
-//		tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]);
-		if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 &&
-				cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0)
-
-			flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX;
-	}
-
-	__threadfence();
-
-	if(flag==1)
-		cudaDofVector[gid] =  tmp*3;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	//Real fy = abs(y);
-
-	//Real tmpMin = Min(fx,abs(y));
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-	extern __shared__ double u[];
-	double* sharedMem[5];
-	sharedMem[0] = u;
-	sharedMem[1] = &(u[blockDim.y+1]);
-	sharedMem[2] = &(sharedMem[1][blockDim.y+1]);
-	sharedMem[3] = sharedMem[1];
-	sharedMem[4] = sharedMem[2];
-
-	int gx = 0;
-	int gy = threadIdx.y;
-	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-	//	return;
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	//int id1 = gx+gy;
-	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-
-	if(blockIdx.x==0)
-	{
-		if(threadIdx.y==0)
-			sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,0))];
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				int k3=k%3;
-
-				if(threadIdx.y==0)
-				{
-					if(gx==n-1)
-						sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy+blockDim.y))];
-					else
-						sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))];
-				}
-//				else
-//					solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1];
-
-				if(gy<n-1)
-					sharedMem[k3][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))];
-
-				solver->updateValue(gx,gy,sharedMem,k3);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-//	else if(blockIdx.x==1)
-//	{
-//		gx=n-1;
-//		gy=threadIdx.y;
-//
-//		if(threadIdx.y==0)
-//					sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,0))];
-//
-//		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-//		{
-//			if(threadIdx.y  < k+1 && gy < n)
-//			{
-//				int k3=k%3;
-//
-//				if(threadIdx.y==0)
-//					if(gx==0)
-//						sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy+blockDim.y))];
-//					else
-//						sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx-1,gy))];
-//				else
-//					solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1];
-//
-//				if(gy<n-1)
-//					sharedMem[k3+2][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))];
-//
-//
-//				solver->updateValue(gx,gy,sharedMem,k3);
-//				gx--;
-//				if(gx==-1)
-//				{
-//					gx=n-1;
-//					gy+=blockDim.y;
-//				}
-//			}
-//
-//
-//			__syncthreads();
-//		}
-//	}
-//	else if(blockIdx.x==2)
-//	{
-//		gx=0;
-//		gy=n-blockDim.y-1+threadIdx.y;
-//
-//		if(threadIdx.y==0)
-//					sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,n-1))];
-//
-//		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-//		{
-//			if(blockDim.y-threadIdx.y  < k+1 && gy > -1)
-//			{
-//				int k3=k%3;
-//
-//				if(threadIdx.y==blockDim.y-1)
-//					if(gx==n-1)
-//						sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy-blockDim.y))];
-//					else
-//						sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))];
-//				else
-//					solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1];
-//
-//				if(gy<n-1)
-//					sharedMem[k3][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))];
-//
-//
-//				solver->updateValue(gx,gy,sharedMem,k3);
-//				gx++;
-//				if(gx==n)
-//				{
-//					gx=0;
-//					gy-=blockDim.y;
-//				}
-//			}
-//
-//
-//			__syncthreads();
-//		}
-//	}
-//	else if(blockIdx.x==3)
-//	{
-//		gx=n-1;
-//		gy=n-blockDim.y-1+threadIdx.y;
-//
-//		if(threadIdx.y==0)
-//					sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,n-1))];
-//
-//
-//		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-//		{
-//			if(blockDim.y-threadIdx.y  < k+1 && gy > -1)
-//			{
-//				int k3=k%3;
-//
-//				if(threadIdx.y==blockDim.y-1)
-//					if(gx==n-1)
-//						sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy-blockDim.y))];
-//					else
-//						sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))];
-//				else
-//					solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1];
-//
-//				if(gy<n-1)
-//					sharedMem[k3+2][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))];
-//
-//
-//				solver->updateValue(gx,gy,sharedMem,k3);
-//				gx--;
-//				if(gx==-1)
-//				{
-//					gx=n-1;
-//					gy-=blockDim.y;
-//				}
-//			}
-//
-//
-//			__syncthreads();
-//		}
-//	}
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h
deleted file mode 100644
index c4ce8fe6b2ee80131c0bc2a945bc8cef4377f106..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h
+++ /dev/null
@@ -1,927 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping()
-:Entity(Mesh),
- dofVector(Mesh),
- dofVector2(Mesh)
-{
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-	dofVector2.load(initialCondition);
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	Entity.refresh();
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-	cout << "a" <<std::endl;
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
-	{
-		dofVector2[i]=INT_MAX*sign(dofVector[i]);
-	}
-
-	for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++)
-	{
-		for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++)
-			{
-			this->Entity.setCoordinates(CoordinatesType(i,j));
-			this->Entity.refresh();
-			neighborEntities.refresh(Mesh,Entity.getIndex());
-
-				if(dofVector[this->Entity.getIndex()] > 0)
-				{
-					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1111(i,j);
-							else
-								setupSquare1110(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1101(i,j);
-							else
-								setupSquare1100(i,j);
-						}
-					}
-					else
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1011(i,j);
-							else
-								setupSquare1010(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1001(i,j);
-							else
-								setupSquare1000(i,j);
-						}
-					}
-				}
-				else
-				{
-					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0111(i,j);
-							else
-								setupSquare0110(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0101(i,j);
-							else
-								setupSquare0100(i,j);
-						}
-					}
-					else
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0011(i,j);
-							else
-								setupSquare0010(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0001(i,j);
-							else
-								setupSquare0000(i,j);
-						}
-					}
-				}
-
-			}
-	}
-	cout << "a" <<std::endl;
-
-//	Real tmp = 0.0;
-//	Real ax=0.5/sqrt(2.0);
-//
-//	if(!exactInput)
-//	{
-//		for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
-//				dofVector[i]=0.5*h*sign(dofVector[i]);
-//	}
-//
-//
-//	for(Index i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		for(Index j = 1; j < Mesh.getDimensions().y()-1; j++)
-//		{
-//			 tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//			if(tmp == 0.0)
-//			{}
-//			else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//			{}
-//			else
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//		}
-//	}
-//
-//
-//
-//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		Index j = 0;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-//	{
-//		Index i = 0;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-//	{
-//		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//
-//	Index i = Mesh.getDimensions().x() - 1;
-//	Index j = Mesh.getDimensions().y() - 1;
-//
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	j = 0;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	i = 0;
-//	j = Mesh.getDimensions().y() -1;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	j = 0;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-	//data.setLike(dofVector2.getData());
-	//data=dofVector2.getData();
-	//cout << data.getType() <<std::endl;
-	dofVector2.save("u-00000.tnl");
-	//dofVector2.getData().save("u-00000.tnl");
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-//	data.setLike(dofVector2.getData());
-//	data = dofVector2.getData();
-//	cout << data.getType() <<std::endl;
-	dofVector2.save("u-00001.tnl");
-	//dofVector2.getData().save("u-00001.tnl");
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	Real value = dofVector2[Entity.getIndex()];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-	else
-	{
-		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-				 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-	}
-
-	if( j == 0 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-	else
-	{
-		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-	}
-
-
-	if(fabs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-
-	dofVector2[Entity.getIndex()] = fabsMin(value, tmp);
-
-//	if(dofVector2[Entity.getIndex()] > 1.0)
-//		cout << value << "    " << tmp << " " << dofVector2[Entity.getIndex()] <<std::endl;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h
deleted file mode 100644
index 54bbe931e0ae305ac300027d7da850d705b4c309..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING2D_IMPL_H_
-#define TNLFASTSWEEPING2D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.getSpaceSteps().x();
-
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-#ifdef HAVE_OPENMP
-//	gridLock = (omp_lock_t*) malloc(sizeof(omp_lock_t)*Mesh.getDimensions().x()*Mesh.getDimensions().y());
-//
-//	for(int i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
-//			omp_init_lock(&gridLock[i]);
-#endif
-
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-
-	Real tmp = 0.0;
-
-	if(!exactInput)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
-				dofVector[i]=0.5*h*sign(dofVector[i]);
-	}
-
-
-	for(Index i = 1; i < Mesh.getDimensions().x()-1; i++)
-	{
-		for(Index j = 1; j < Mesh.getDimensions().y()-1; j++)
-		{
-			 tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-			if(tmp == 0.0)
-			{}
-			else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-					dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-					dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-					dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-			{}
-			else
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-		}
-	}
-
-
-
-	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-	{
-		Index j = 0;
-		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-		{}
-		else
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-	{
-		Index j = Mesh.getDimensions().y() - 1;
-		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-	{
-		Index i = 0;
-		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-	{
-		Index i = Mesh.getDimensions().x() - 1;
-		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-
-
-		if(tmp == 0.0)
-		{}
-		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-		{}
-		else
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-	}
-
-
-	Index i = Mesh.getDimensions().x() - 1;
-	Index j = Mesh.getDimensions().y() - 1;
-
-	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-
-		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-
-
-	j = 0;
-	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-
-		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-
-
-	i = 0;
-	j = Mesh.getDimensions().y() -1;
-	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-
-		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-
-
-	j = 0;
-	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-
-		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-
-	dofVector.save("u-00000.tnl");
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	DofVectorType d2,d3,d4;
-	d2.setLike(dofVector);
-	d2=dofVector;
-	d3.setLike(dofVector);
-	d3=dofVector;
-	d4.setLike(dofVector);
-	d4=dofVector;
-
-
-#ifdef HAVE_OPENMP
-#pragma omp parallel sections num_threads(4)
-	{
-	{
-#endif
-
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j,&dofVector);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-#ifdef HAVE_OPENMP
-	}
-#pragma omp section
-	{
-#endif
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j,&d2);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-#ifdef HAVE_OPENMP
-	}
-#pragma omp section
-	{
-#endif
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j, &d3);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-#ifdef HAVE_OPENMP
-	}
-#pragma omp section
-	{
-#endif
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j, &d4);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-#ifdef HAVE_OPENMP
-	}
-	}
-#endif
-
-
-#ifdef HAVE_OPENMP
-#pragma omp parallel for num_threads(4) schedule(dynamic)
-#endif
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			int index = Mesh.getCellIndex(CoordinatesType(i,j));
-			dofVector[index] = fabsMin(dofVector[index], d2[index]);
-			dofVector[index] = fabsMin(dofVector[index], d3[index]);
-			dofVector[index] = fabsMin(dofVector[index], d4[index]);
-		}
-	}
-
-	dofVector.save("u-00001.tnl");
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, DofVectorType* grid)
-{
-	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-	Real value = (*grid)[index];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = (*grid)[Mesh.template getCellNextToCell<1,0>(index)];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = (*grid)[Mesh.template getCellNextToCell<-1,0>(index)];
-	else
-	{
-		a = fabsMin( (*grid)[Mesh.template getCellNextToCell<-1,0>(index)],
-				 (*grid)[Mesh.template getCellNextToCell<1,0>(index)] );
-	}
-
-	if( j == 0 )
-		b = (*grid)[Mesh.template getCellNextToCell<0,1>(index)];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = (*grid)[Mesh.template getCellNextToCell<0,-1>(index)];
-	else
-	{
-		b = fabsMin( (*grid)[Mesh.template getCellNextToCell<0,-1>(index)],
-				 (*grid)[Mesh.template getCellNextToCell<0,1>(index)] );
-	}
-
-
-	if(fabs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-#ifdef HAVE_OPENMP
-//	omp_set_lock(&gridLock[index]);
-#endif
-	(*grid)[index]  = fabsMin(value, tmp);
-#ifdef HAVE_OPENMP
-//	omp_unset_lock(&gridLock[index]);
-#endif
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h
deleted file mode 100644
index 6a5195cfe4edda5754b8826158d6d9faa5701fa0..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h
+++ /dev/null
@@ -1,961 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING3D_IMPL_H_
-#define TNLFASTSWEEPING3D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-//__device__
-//double fabsMin( double x, double y)
-//{
-//	double fx = abs(x);
-//
-//	if(Min(fx,abs(y)) == fx)
-//		return x;
-//	else
-//		return y;
-//}
-//
-//__device__
-//double atomicFabsMin(double* address, double val)
-//{
-//	unsigned long long int* address_as_ull =
-//						  (unsigned long long int*)address;
-//	unsigned long long int old = *address_as_ull, assumed;
-//	do {
-//		assumed = old;
-//			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) ));
-//	} while (assumed != old);
-//	return __longlong_as_double(old);
-//}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	this->h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(8, 8,8);
-	dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(1, 1024);
-	dim3 numBlocks(8,1);
-
-
-	runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-{
-	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j,k));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-	Real value = cudaDofVector2[Entity.getIndex()];
-	Real a,b,c, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-	else
-	{
-		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-	else
-	{
-		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()] );
-	}
-
-	if( k == 0 )
-		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
-	else if( k == Mesh.getDimensions().z() - 1 )
-		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-	else
-	{
-		c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()] );
-	}
-
-	Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c);
-
-	if(hD < 0.0)
-		tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h;
-	else
-		tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) );
-
-	atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k)
-{
-	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j,k));
-	Entity.refresh();
-	int gid = Entity.getIndex();
-
-	if(abs(cudaDofVector[gid]) < 1.0*h)
-		cudaDofVector2[gid] = 0.5*h;//cudaDofVector[gid];
-	else
-		cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]);
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-	int gx = 0;
-	int gy = threadIdx.y;
-
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-
-	if(blockIdx.x==0)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx = 0;
-		gy = threadIdx.y;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-
-
-
-
-	else if(blockIdx.x==4)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx = 0;
-		gy = threadIdx.y;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==5)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==6)
-	{
-
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==7)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gz = blockDim.z*blockIdx.z + threadIdx.z;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz)
-	{
-		solver->initGrid(gx,gy,gz);
-	}
-
-
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-al;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//
-//
-//
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-al;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//}
-#endif
-
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h
deleted file mode 100644
index e22de0ab85f3814004b072ef0ebdb403b7f970c7..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping2D_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING3D_IMPL_H_
-#define TNLFASTSWEEPING3D_IMPL_H_
-
-#include "tnlFastSweeping.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlFastSweeping< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping()
-:Entity(Mesh),
- dofVector(Mesh),
- dofVector2(Mesh)
-{
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-	dofVector2.load(initialCondition);
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
-	Entity.refresh();
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-//	cout << "bla "<<endl;
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++)
-	{
-
-		if (abs(dofVector[i]) < 1.8*h)
-			dofVector2[i]=dofVector[i];
-		else
-			dofVector2[i]=INT_MAX*sign(dofVector[i]);
-	}
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-
-
-
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-	dofVector2.save("u-00001.tnl");
-
-	cout << "bla 3"<<endl;
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j,k));
-	this->Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-	Real value = dofVector2[Entity.getIndex()];
-	Real a,b,c, tmp;
-
-	if( i == 0 )
-		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-	else
-	{
-		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()] );
-	}
-
-	if( j == 0 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()];
-	else
-	{
-		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()] );
-	}
-
-	if( k == 0 )
-		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()];
-	else if( k == Mesh.getDimensions().z() - 1 )
-		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()];
-	else
-	{
-		c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()] );
-	}
-
-	Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c);
-
-	if(hD < 0.0)
-		tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h;
-	else
-		tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) );
-
-
-	dofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-}
-
-
-
-#endif /* TNLFASTSWEEPING_IMPL_H_ */
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h
deleted file mode 100644
index fc9eb545987dfa14bb17b28f4d104d4c6e6fa2e7..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* 
- * File:   tnlFastSweepingSolver.h
- * Author: oberhuber
- *
- * Created on July 12, 2016, 6:04 PM
- */
-
-#pragma once
-
-#include <functions/tnlConstantFunction.h>
-#include <problems/tnlPDEProblem.h>
-
-template< typename Mesh,
-          typename Communicator,
-          typename Anisotropy = tnlConstanstFunction< Mesh > >
-class tnlFastSweepingSolver  : public tnlPDEProblem< Mesh,
-                                                     Communicator,
-                                                     typename Mesh::RealType,
-                                                     typename Mesh::DeviceType,
-                                                     typename Mesh::IndexType  >
-{
-   public:
-
-      typedef typename DifferentialOperator::RealType RealType;
-      typedef typename Mesh::DeviceType DeviceType;
-      typedef typename DifferentialOperator::IndexType IndexType;
-
-      typedef tnlMeshFunction< Mesh > MeshFunctionType;
-      typedef tnlPDEProblem< Mesh, TimeDependentProblem, RealType, DeviceType, IndexType > BaseType;
-
-      using typename BaseType::MeshType;
-      using typename BaseType::DofVectorType;
-      using typename BaseType::MeshDependentDataType;
-};
-
-
diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h
deleted file mode 100644
index f531da431bfec5d16da8ea7deabe6595031a0873..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/***************************************************************************
-                          tnlFastSweeping_CUDA.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLFASTSWEEPING_H_
-#define TNLFASTSWEEPING_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-
-#include <functions/tnlMeshFunction.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlFastSweeping
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlFastSweeping();
-
-	__host__ static String getType();
-	__host__ bool init( const Config::ParameterContainer& parameters );
-	__host__ bool run();
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid();
-	__device__ void updateValue(const Index i, const Index j);
-	__device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	int counter;
-	__device__ void setupSquare1000(Index i, Index j);
-	__device__ void setupSquare1100(Index i, Index j);
-	__device__ void setupSquare1010(Index i, Index j);
-	__device__ void setupSquare1001(Index i, Index j);
-	__device__ void setupSquare1110(Index i, Index j);
-	__device__ void setupSquare1101(Index i, Index j);
-	__device__ void setupSquare1011(Index i, Index j);
-	__device__ void setupSquare1111(Index i, Index j);
-	__device__ void setupSquare0000(Index i, Index j);
-	__device__ void setupSquare0100(Index i, Index j);
-	__device__ void setupSquare0010(Index i, Index j);
-	__device__ void setupSquare0001(Index i, Index j);
-	__device__ void setupSquare0110(Index i, Index j);
-	__device__ void setupSquare0101(Index i, Index j);
-	__device__ void setupSquare0011(Index i, Index j);
-	__device__ void setupSquare0111(Index i, Index j);
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data;
-
-
-	RealType h;
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-
-	__host__ static String getType();
-	__host__ bool init( const Config::ParameterContainer& parameters );
-	__host__ bool run();
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid(int i, int j, int k);
-	__device__ void updateValue(const Index i, const Index j, const Index k);
-	__device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	int counter;
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data;
-
-	RealType h;
-
-
-};
-
-
-
-
-
-
-
-#ifdef HAVE_CUDA
-//template<int sweep_t>
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
-#endif
-
-/*various implementtions.... choose one*/
-//#include "tnlFastSweeping2D_CUDA_impl.h"
-//#include "tnlFastSweeping2D_CUDA_v2_impl.h"
-//#include "tnlFastSweeping2D_CUDA_v3_impl.h"
-#include "tnlFastSweeping2D_CUDA_v4_impl.h"
-//#include "tnlFastSweeping2D_CUDA_v5_impl.h"
-
-
-#include "tnlFastSweeping3D_CUDA_impl.h"
-
-#endif /* TNLFASTSWEEPING_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt
deleted file mode 100644
index 48382df82de6e7e175e47d24fc3ce69e13c217b5..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-set( tnl_hamilton_jacobi_parallel_map_SOURCES
-#     MainBuildConfig.h
-#     tnlParallelMapSolver2D_impl.h
-#     tnlParallelMapSolver.h
-#     parallelMapConfig.h 
-#	  main.cu
-     main.cpp)
-
-
-IF(  BUILD_CUDA ) 
-	CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cu)
-ELSE(  BUILD_CUDA )                
-	ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cpp)
-ENDIF( BUILD_CUDA )
-target_link_libraries (hamilton-jacobi-parallel-map tnl )
-
-
-INSTALL( TARGETS hamilton-jacobi-parallel-map
-         RUNTIME DESTINATION bin
-         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
-#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_map_SOURCES}
-#         DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel-map )
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h
deleted file mode 100644
index ed3d686eb99379af1589d734eac9b5812cccdedf..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
-                          MainBuildConfig.h  -  description
-                             -------------------
-    begin                : Jul 7, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef MAINBUILDCONFIG_H_
-#define MAINBUILDCONFIG_H_
-
-#include <solvers/tnlBuildConfigTags.h>
-
-class MainBuildConfig
-{
-   public:
-
-      static void print() {std::cerr << "MainBuildConfig" <<std::endl; }
-};
-
-/****
- * Turn off support for float and long double.
- */
-template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
-template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
-
-/****
- * Turn off support for short int and long int indexing.
- */
-template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
-template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
-
-/****
- * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
- */
-template< int Dimensions, typename Real, typename Device, typename Index >
-   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
-      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
-                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
-                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
-                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
-
-/****
- * Please, chose your preferred time discretisation  here.
- */
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
-
-/****
- * Only the Runge-Kutta-Merson solver is enabled by default.
- */
-template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
-
-#endif /* MAINBUILDCONFIG_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt
deleted file mode 100644
index d4ae61983910a676269a23e3d992f5f46ea83a8f..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-tomas@tomas-linux:~/Desktop/VU_CPU_MAPA/work_dir$ gnuplot
-
-	G N U P L O T
-	Version 4.6 patchlevel 4    last modified 2013-10-02 
-	Build System: Linux x86_64
-
-	Copyright (C) 1986-1993, 1998, 2004, 2007-2013
-	Thomas Williams, Colin Kelley and many others
-
-	gnuplot home:     http://www.gnuplot.info
-	faq, bugs, etc:   type "help FAQ"
-	immediate help:   type "help"  (plot window: hit 'h')
-
-Terminal type set to 'wxt'
-gnuplot> set cntrparam levels 15
-gnuplot> set cntrparam bspline
-gnuplot> set contour
-gnuplot> splot 'u-00001.gplt'
-
-gnuplot> unset surface
-gnuplot> splot 'u-00001.gplt'
-
-gnuplot> set table "test.gplt"
-gnuplot> splot 'u-00001.gplt'
-gnuplot> unset table
-
-gnuplot> set table "test2.gplt"
-gnuplot> plot 'test.gplt' index 10
-gnuplot> unset table
-
-gnuplot> plot 'test2.gplt' 
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp
deleted file mode 100644
index b13498e17330fae7bb00a0bdc2abcc7a19f8e7a8..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Jul 8 , 2014
-    copyright            : (C) 2014 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu
deleted file mode 100644
index 7101976712e153d73c5f0979b211164a36ec648d..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cu  -  description
-                             -------------------
-    begin                : Mar 30 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h
deleted file mode 100644
index fff21c77eb5980a3f3f86c28170b4169dd6f7917..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/***************************************************************************
-                          main.h  -  description
-                             -------------------
-    begin                : Mar 22 , 2016
-    copyright            : (C) 2016 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "tnlParallelMapSolver.h"
-#include "parallelMapConfig.h"
-#include "MainBuildConfig.h"
-#include <solvers/tnlBuildConfigTags.h>
-#include <operators/hamilton-jacobi/godunov-eikonal/parallelGodunovMap.h>
-#include <mesh/tnlGrid.h>
-#include <core/tnlDevice.h>
-#include <time.h>
-#include <ctime>
-
-typedef MainBuildConfig BuildConfig;
-
-int main( int argc, char* argv[] )
-{
-	time_t start;
-	time_t stop;
-	time(&start);
-	std::clock_t start2= std::clock();
-	Config::ParameterContainer parameters;
-	tnlConfigDescription configDescription;
-	parallelMapConfig< BuildConfig >::configSetup( configDescription );
-
-	if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
-	  return false;
-
-
-	tnlDeviceEnum device;
-	device = TNL::Devices::HostDevice;
-
-	const int& dim = parameters.getParameter< int >( "dim" );
-
-	if(dim == 2)
-	{
-
-	   typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
-/*#ifdef HAVE_CUDA
-		   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
-#endif
-#ifndef HAVE_CUDA*/
-	   typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-/*#endif*/
-
-	   if(device==TNL::Devices::HostDevice)
-	   {
-		   typedef TNL::Devices::Host Device;
-
-
-		   tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
-	   else if(device==tnlCudaDevice )
-	   {
-		   typedef tnlCuda Device;
-//typedef parallelGodunovMapScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType;
-
-		   tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
-	}
-
-
-	time(&stop);
-	cout <<std::endl;
-	cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl;
-	return EXIT_SUCCESS;
-}
-
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png b/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png
deleted file mode 100644
index 668b6fe24b17b2fec486db28505b41e3beb2091a..0000000000000000000000000000000000000000
Binary files a/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png and /dev/null differ
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile
deleted file mode 100644
index bfdc1ef236ca02ecfe6bc88f81d872e9524ec621..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-TNL_VERSION=0.1
-TNL_INSTALL_DIR=${HOME}/local/lib
-TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION}
-
-TARGET = hamiltonJacobiParallelSolver
-#CONFIG_FILE = $(TARGET).cfg.desc
-INSTALL_DIR = ${HOME}/local
-CXX = g++
-CUDA_CXX = nvcc
-OMP_FLAGS = -DHAVE_OPENMP -fopenmp
-CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG
-LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp
-
-SOURCES = main.cpp
-HEADERS = 
-OBJECTS = main.o
-DIST = $(SOURCES) Makefile
-
-all: $(TARGET)
-clean: 
-	rm -f $(OBJECTS)
-	rm -f $(TARGET)-conf.h	
-
-dist: $(DIST)
-	tar zcvf $(TARGET).tgz $(DIST) 
-
-install: $(TARGET)
-	cp $(TARGET) $(INSTALL_DIR)/bin
-	cp $(CONFIG_FILE) $(INSTALL_DIR)/share
-
-uninstall: $(TARGET)
-	rm -f $(INSTALL_DIR)/bin/$(TARGET) 
-	rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share
-
-$(TARGET): $(OBJECTS)
-	$(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS)
-
-%.o: %.cpp $(HEADERS)
-	$(CXX) -c -o $@ $(CXX_FLAGS) $<
-
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h
deleted file mode 100644
index c07ee95aa04bb8c7a1f3cc376aabd859ff7bc5be..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/***************************************************************************
-                          parallelMapConfig.h  -  description
-                             -------------------
-    begin                : Mar 22 , 2016
-    copyright            : (C) 2016 by Tomas Sobotik
-    email                :
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_
-#define HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_
-
-#include <config/tnlConfigDescription.h>
-
-template< typename ConfigTag >
-class parallelMapConfig
-{
-   public:
-      static void configSetup( tnlConfigDescription& config )
-      {
-         config.addDelimiter( "Parallel Eikonal solver settings:" );
-         config.addEntry        < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" );
-         config.addEntry       < String > ( "scheme", "This defines scheme used for discretization.", "godunov" );
-         config.addEntryEnum( "godunov" );
-         config.addEntryEnum( "upwind" );
-         config.addRequiredEntry        < String > ( "initial-condition", "Initial condition for solver");
-         config.addRequiredEntry        < String > ( "map", "Gradient map for solver");
-         config.addEntry       < String > ( "mesh", "Name of mesh.", "mesh.tnl" );
-         config.addEntry        < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 );
-         config.addEntry        < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 );
-         config.addRequiredEntry        < double > ( "stop-time", " Final time for solver");
-         config.addRequiredEntry        < double > ( "initial-tau", " initial tau for solver" );
-         config.addEntry        < double > ( "cfl-condition", " CFL condition", 0.0 );
-         config.addEntry        < int > ( "subgrid-size", "Subgrid size.", 16 );
-         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
-      }
-};
-
-#endif /* HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run b/src/TNL/Legacy/hamilton-jacobi-parallel-map/run
deleted file mode 100755
index 48441996274633f8d391d9b32978b05b2e4fa263..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-dimensions=2
-
-size=2
-
-time=50
-
-rm -r work_dir
-mkdir work_dir
-cp mapa_png.png work_dir/mapa_png.png
-cd  work_dir
-
-tnl-image-converter 		--image-format png\
-		    		--input-images mapa_png.png
-
-
-tnl-init 			--test-function sdf-para \
-	     			--x-centre 0.5 \
-	    			--y-centre 1.0 \
- 	   			--offset 0.05 \
-           			--output-file init.tnl \
-	     			--final-time 0.0 \
-	     			--snapshot-period 0.1
-
-hamilton-jacobi-parallel-map-dbg 	--initial-condition init.tnl \
-				--map mapa_png.tnl \
-              			--cfl-condition 50 \
-	      	  		--mesh mesh.tnl \
-	     	  		--initial-tau 1.0e-3 \
-	      	  		--epsilon 4.0 \
-        	  		--delta 0.0 \
-       	      			--stop-time $time \
-	          		--scheme godunov \
-	          		--subgrid-size 8 \
-		  		--dim $dimensions
-
-	
-#cp ../template.dat1 template.dat1
-#cp ../template.dat2 template.dat2
-#cp ../gplt2eps.py gplt2eps.py
-cd ..
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py
deleted file mode 100755
index f8cde3768e9b76156507e133f8bc3ecaa526fc71..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-
-import sys, string, math
-
-arguments = sys. argv[1:]
-format = "txt"
-output_file_name = "eoc-table.txt"
-input_files = []
-verbose = 1
-size = 1.0
-
-i = 0
-while i < len( arguments ):
-   if arguments[ i ] == "--format":
-      format = arguments[ i + 1 ]
-      i = i + 2
-      continue
-   if arguments[ i ] == "--output-file":
-      output_file_name = arguments[ i + 1 ]
-      i = i + 2
-      continue
-   if arguments[ i ] == "--verbose":
-       verbose = float( arguments[ i + 1 ] )
-       i = i +2
-       continue
-   if arguments[ i ] == "--size":
-       size = float( arguments[ i + 1 ] )
-       i = i +2
-       continue
-   input_files. append( arguments[ i ] )
-   i = i + 1
-
-if not verbose == 0:
-   print "Writing to " + output_file_name + " in " + format + "."
-
-h_list = []
-l1_norm_list = []
-l2_norm_list = []
-max_norm_list = []
-items = 0
-
-for file_name in input_files:
-   if not verbose == 0:
-       print "Processing file " + file_name
-   file = open( file_name, "r" )
-   
-   l1_max = 0.0
-   l_max_max = 0.0
-   file.readline();
-   file.readline();
-   for line in file. readlines():
-         data = string. split( line )
-         h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) )
-         l1_norm_list. append( float( data[ 1 ] ) )
-         l2_norm_list. append( float( data[ 2 ] ) )
-         max_norm_list. append( float( data[ 3 ] ) )
-         items = items + 1
-         if not verbose == 0:
-            print line
-   file. close()
-
-h_width = 12
-err_width = 15
-file = open( output_file_name, "w" )
-if format == "latex":
-      file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" )
-      file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" )
-      file. write( " " + string. rjust( " ", h_width ) + "&" +
-                string. rjust( "Error", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) + "&" +
-                string. rjust( "Error", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) + "&" +
-                string. rjust( "Error.", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) +
-                "\\\\ \\hline \\hline \n")
-if format == "txt":
-    file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-    file. write( "|       h      |     L1 Err.    |     L1 EOC.    |     L2 Err.    |      L2 EOC    |    MAX Err.    |     MAX EOC    |\n" )
-    file. write( "+==============+================+================+================+================+================+================+\n" )
-                  
-
-i = 0
-while i < items:
-   if i == 0:
-      if format == "latex":
-         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + 
-                      string. rjust( " ", err_width ) + "&"+ 
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( " ", err_width ) + "&" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( " ", err_width ) + "\\\\\n" )
-      if format == "txt":
-         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + 
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |\n" )
-         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-      i = i + 1;
-      continue
-   if h_list[ i ] == h_list[ i - 1 ]:
-      print "Unable to count eoc since h[ " + \
-      str( i ) + " ] = h[ " + str( i - 1 ) + \
-      " ] = " + str( h_list[ i ] ) + ". \n"
-      file. write( " eoc error:  h[ " + \
-      str( i ) + " ] = h[ " + str( i - 1 ) + \
-      " ] = " + str( h_list[ i ] ) + ". \n" )
-   else:
-      h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] )
-      l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] )
-      l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] )
-      max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] )
-      if format == "latex":
-         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" )
-      if format == "txt":
-         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" )
-         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-   i = i + 1
-
-if format == "latex":
-   file. write( "\\hline \n" )
-   file. write( "\\end{tabular} \n" )
-    
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
deleted file mode 100644
index 400e163c9dcc8d536a478a0952aabf8ccbb1a2d8..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/***************************************************************************
-                          tnlParallelMapSolver.h  -  description
-                             -------------------
-    begin                : Mar 22 , 2016
-    copyright            : (C) 2016 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLPARALLELMAPSOLVER_H_
-#define TNLPARALLELMAPSOLVER_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <functions/tnlMeshFunction.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-
-
-#include <ctime>
-
-#ifdef HAVE_CUDA
-#include <core/tnlCuda.h>
-#endif
-
-
-template< int Dimension,
-		  typename SchemeHost,
-		  typename SchemeDevice,
-		  typename Device,
-		  typename RealType = double,
-          typename IndexType = int >
-class tnlParallelMapSolver
-{};
-
-template<typename SchemeHost, typename SchemeDevice, typename Device>
-class tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >
-{
-public:
-
-	typedef SchemeDevice SchemeTypeDevice;
-	typedef SchemeHost SchemeTypeHost;
-	typedef Device DeviceType;
-	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
-	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
-	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType;
-#ifdef HAVE_CUDA
-	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
-	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
-	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA;
-#endif
-	tnlParallelMapSolver();
-	bool init( const Config::ParameterContainer& parameters );
-	void run();
-
-	void test();
-
-/*private:*/
-
-
-	void synchronize();
-
-	int getOwner( int i) const;
-
-	int getSubgridValue( int i ) const;
-
-	void setSubgridValue( int i, int value );
-
-	int getBoundaryCondition( int i ) const;
-
-	void setBoundaryCondition( int i, int value );
-
-	void stretchGrid();
-
-	void contractGrid();
-
-	VectorType getSubgrid( const int i ) const;
-
-	void insertSubgrid( VectorType u, const int i );
-
-	VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map);
-
-
-	tnlMeshFunction<MeshType> u0;
-	VectorType work_u, map_stretched, map;
-	IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
-	MeshType mesh, subMesh;
-
-//	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
-
-	SchemeHost schemeHost;
-	SchemeDevice schemeDevice;
-	double delta, tau0, stopTime,cflCondition;
-	int gridRows, gridCols, gridLevels, currentStep, n;
-
-	std::clock_t start;
-	double time_diff;
-
-
-	tnlDeviceEnum device;
-
-	tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
-	{
-		return this;
-	};
-
-#ifdef HAVE_CUDA
-
-	tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;
-
-	double* work_u_cuda;
-	double* map_stretched_cuda;
-
-	int* subgridValues_cuda;
-	int* boundaryConditions_cuda;
-	int* unusedCell_cuda;
-	int* calculationsCount_cuda;
-	double* tmpw;
-	double* tmp_map;
-
-
-	int* runcuda;
-	int run_host;
-
-
-	__device__ void getSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void updateSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void insertSubgridCUDA2D( double u, const int i );
-
-	__device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID);
-
-	__device__ int getOwnerCUDA2D( int i) const;
-
-	__device__ int getSubgridValueCUDA2D( int i ) const;
-
-	__device__ void setSubgridValueCUDA2D( int i, int value );
-
-	__device__ int getBoundaryConditionCUDA2D( int i ) const;
-
-	__device__ void setBoundaryConditionCUDA2D( int i, int value );
-
-#endif
-
-};
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#ifdef HAVE_CUDA
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void runCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initRunCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initCUDA2D( tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3, double* tmp_map_ptr);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-
-
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-#endif
-
-#include "tnlParallelMapSolver2D_impl.h"
-#endif /* TNLPARALLELMAPSOLVER_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
deleted file mode 100644
index e8cbc6fc1619c8a936a0239a5b4e0056361412e3..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
+++ /dev/null
@@ -1,1315 +0,0 @@
-/***************************************************************************
-                          tnlParallelMapSolver2D_impl.h  -  description
-                             -------------------
-    begin                : Mar 22 , 2016
-    copyright            : (C) 2016 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLPARALLELMAPSOLVER2D_IMPL_H_
-#define TNLPARALLELMAPSOLVER2D_IMPL_H_
-
-
-#include "tnlParallelMapSolver.h"
-#include <core/mfilename.h>
-
-
-
-
-#define MAP_SOLVER_MAX_VALUE 3
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver()
-{
-	this->device = TNL::Devices::HostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-	run_host = 1;
-	}
-#endif
-
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test()
-{
-/*
-	for(int i =0; i < this->subgridValues.getSize(); i++ )
-	{
-		insertSubgrid(getSubgrid(i), i);
-	}
-*/
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-
-bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters )
-{
-	cout << "Initializating solver..." <<std::endl;
-	const String& meshLocation = parameters.getParameter <String>("mesh");
-	this->mesh.load( meshLocation );
-
-	this->n = parameters.getParameter <int>("subgrid-size");
-	cout << "Setting N to " << this->n <<std::endl;
-
-	this->subMesh.setDimensions( this->n, this->n );
-	this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0),
-							 Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) );
-
-	this->subMesh.save("submesh.tnl");
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	this->u0.load( initialCondition );
-
-	/* LOAD MAP */
-	const String& mapFile = parameters.getParameter <String>("map");
-	if(! this->map.load( mapFile ))
-		cout << "Failed to load map file : " << mapFile <<std::endl;
-
-
-	this->delta = parameters.getParameter <double>("delta");
-	this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >();
-
-	cout << "Setting delta to " << this->delta <<std::endl;
-
-	this->tau0 = parameters.getParameter <double>("initial-tau");
-	cout << "Setting initial tau to " << this->tau0 <<std::endl;
-	this->stopTime = parameters.getParameter <double>("stop-time");
-
-	this->cflCondition = parameters.getParameter <double>("cfl-condition");
-	this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >());
-	cout << "Setting CFL to " << this->cflCondition <<std::endl;
-
-	stretchGrid();
-	this->stopTime /= (double)(this->gridCols);
-	this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0));
-	cout << "Setting stopping time to " << this->stopTime <<std::endl;
-
-	cout << "Initializating scheme..." <<std::endl;
-	if(!this->schemeHost.init(parameters))
-	{
-		cerr << "SchemeHost failed to initialize." <<std::endl;
-		return false;
-	}
-	cout << "Scheme initialized." <<std::endl;
-
-	test();
-
-	VectorType* tmp = new VectorType[subgridValues.getSize()];
-	bool containsCurve = false;
-
-#ifdef HAVE_CUDA
-
-	if(this->device == tnlCudaDevice)
-	{
-		cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >));
-		cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice);
-
-		double** tmpdev = NULL;
-		cudaMalloc(&tmpdev, sizeof(double*));
-		cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
-		cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double));
-		cudaMalloc(&(this->runcuda), sizeof(int));
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		int* tmpUC;
-		cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
-		cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
-
-		initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		double* tmpu = NULL;
-		cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
-		cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-		cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-	}
-#endif
-
-	if(this->device == TNL::Devices::HostDevice)
-	{
-		VectorType tmp_map;
-		tmp_map.setSize(this->n * this->n);
-		for(int i = 0; i < this->subgridValues.getSize(); i++)
-		{
-
-			if(! tmp[i].setSize(this->n * this->n))
-				cout << "Could not allocate tmp["<< i <<"] array." <<std::endl;
-				tmp[i] = getSubgrid(i);
-			containsCurve = false;
-
-			for(int j = 0; j < tmp[i].getSize(); j++)
-			{
-				if(tmp[i][0]*tmp[i][j] <= 0.0)
-				{
-					containsCurve = true;
-					j=tmp[i].getSize();
-				}
-
-			}
-			if(containsCurve)
-			{
-				for( int j = 0; j < tmp_map.getSize(); j++)
-				{
-					tmp_map[j] = this->map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols
-										 + (i % this->gridCols) * this->n
-										 + (j/this->n) * this->n*this->gridCols
-										 + (j % this->n) ];
-				}
-				//cout << "Computing initial SDF on subgrid " << i << "." <<std::endl;
-				tmp[i] = runSubgrid(0, tmp[i],i,tmp_map);
-				insertSubgrid(tmp[i], i);
-				setSubgridValue(i, 4);
-				//cout << "Computed initial SDF on subgrid " << i  << "." <<std::endl;
-			}
-			containsCurve = false;
-
-		}
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-	}
-#endif
-
-
-	this->currentStep = 1;
-	if(this->device == TNL::Devices::HostDevice)
-		synchronize();
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-
-		synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-	}
-
-#endif
-	cout << "Solver initialized." <<std::endl;
-
-	return true;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run()
-{
-	if(this->device == TNL::Devices::HostDevice)
-	{
-		while ((this->boundaryConditions.max() > 0 )/* || !end*/)
-		{
-
-#ifdef HAVE_OPENMP
-#pragma omp parallel for num_threads(4) schedule(dynamic)
-#endif
-			for(int i = 0; i < this->subgridValues.getSize(); i++)
-			{
-				if(getSubgridValue(i) != INT_MAX)
-				{
-					VectorType tmp, tmp_map;
-					tmp.setSize(this->n * this->n);
-					tmp_map.setSize(this->n * this->n);
-					for( int j = 0; j < tmp_map.getSize(); j++)
-					{
-						tmp_map[j] = this->map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols
-											 + (i % this->gridCols) * this->n
-											 + (j/this->n) * this->n*this->gridCols
-											 + (j % this->n) ];
-					}
-
-					if(getSubgridValue(i) == currentStep+4)
-					{
-
-						if(getBoundaryCondition(i) & 1)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(1, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) & 2)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(2, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) & 4)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(4, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) & 8)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(8, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-					}
-					else
-					{
-
-						if(getBoundaryCondition(i) == 1)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(1, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) == 2)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(2, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) == 4)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(4, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-						if(getBoundaryCondition(i) == 8)
-						{
-							tmp = getSubgrid(i);
-							tmp = runSubgrid(8, tmp ,i,tmp_map);
-							insertSubgrid( tmp, i);
-							this->calculationsCount[i]++;
-						}
-					}
-
-					if(getBoundaryCondition(i) & 3)
-					{
-						//cout << "3 @ " << getBoundaryCondition(i) <<std::endl;
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(3, tmp ,i,tmp_map);
-						insertSubgrid( tmp, i);
-					}
-					if(getBoundaryCondition(i) & 5)
-					{
-						//cout << "5 @ " << getBoundaryCondition(i) <<std::endl;
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(5, tmp ,i,tmp_map);
-						insertSubgrid( tmp, i);
-					}
-					if(getBoundaryCondition(i) & 10)
-					{
-						//cout << "10 @ " << getBoundaryCondition(i) <<std::endl;
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(10, tmp ,i,tmp_map);
-						insertSubgrid( tmp, i);
-					}
-					if(getBoundaryCondition(i) & 12)
-					{
-						//cout << "12 @ " << getBoundaryCondition(i) <<std::endl;
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(12, tmp ,i,tmp_map);
-						insertSubgrid( tmp, i);
-					}
-
-
-					setBoundaryCondition(i, 0);
-
-					setSubgridValue(i, getSubgridValue(i)-1);
-
-				}
-			}
-			synchronize();
-		}
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		bool end_cuda = false;
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		bool* tmpb;
-		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		int i = 1;
-		time_diff = 0.0;
-		while (run_host || !end_cuda)
-		{
-			cout << "Computing at step "<< i++ <<std::endl;
-			if(run_host != 0 )
-				end_cuda = true;
-			else
-				end_cuda = false;
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			start = std::clock();
-			runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-			//start = std::clock();
-			synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-			cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost);
-		}
-		cout << "Solving time was: " << time_diff <<std::endl;
-
-		cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-
-		cudaDeviceSynchronize();
-	}
-#endif
-	contractGrid();
-	this->u0.save("u-00001.tnl");
-	cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl;
-	cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl;
-	cout << "Solver finished" <<std::endl;
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-		cudaFree(this->runcuda);
-		cudaFree(this->tmpw);
-		cudaFree(this->tmp_map);
-		cudaFree(this->cudaSolver);
-	}
-#endif
-
-}
-
-//north - 1, east - 2, west - 4, south - 8
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-	cout << "Synchronizig..." <<std::endl;
-	int tmp1, tmp2;
-	int grid1, grid2;
-
-//	if(this->currentStep & 1)
-//	{
-		for(int j = 0; j < this->gridRows - 1; j++)
-		{
-			for (int i = 0; i < this->gridCols*this->n; i++)
-			{
-				tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i;
-				tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i;
-				grid1 = getSubgridValue(getOwner(tmp1));
-				grid2 = getSubgridValue(getOwner(tmp2));
-				if(getOwner(tmp1)==getOwner(tmp2))
-					cout << "i, j" << i << "," << j <<std::endl;
-				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					this->work_u[tmp2] = this->work_u[tmp1];
-					this->unusedCell[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp2), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp2)) & 8) )
-						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8);
-				}
-				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					this->work_u[tmp1] = this->work_u[tmp2];
-					this->unusedCell[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp1), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp1)) & 1) )
-						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1);
-				}
-			}
-		}
-
-//	}
-//	else
-//	{
-		for(int i = 1; i < this->gridCols; i++)
-		{
-			for (int j = 0; j < this->gridRows*this->n; j++)
-			{
-				tmp1 = this->gridCols*this->n*j + i*this->n - 1;
-				tmp2 = this->gridCols*this->n*j + i*this->n ;
-				grid1 = getSubgridValue(getOwner(tmp1));
-				grid2 = getSubgridValue(getOwner(tmp2));
-				if(getOwner(tmp1)==getOwner(tmp2))
-					cout << "i, j" << i << "," << j <<std::endl;
-				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					this->work_u[tmp2] = this->work_u[tmp1];
-					this->unusedCell[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp2), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp2)) & 4) )
-						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4);
-				}
-				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					this->work_u[tmp1] = this->work_u[tmp2];
-					this->unusedCell[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp1), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp1)) & 2) )
-						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2);
-				}
-			}
-		}
-//	}
-
-
-	this->currentStep++;
-	int stepValue = this->currentStep + 4;
-	for (int i = 0; i < this->subgridValues.getSize(); i++)
-	{
-		if( getSubgridValue(i) == -INT_MAX )
-			setSubgridValue(i, stepValue);
-	}
-
-	cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl;
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const
-{
-
-	return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const
-{
-	return this->subgridValues[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value)
-{
-	this->subgridValues[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const
-{
-	return this->boundaryConditions[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value)
-{
-	this->boundaryConditions[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid()
-{
-	cout << "Stretching grid..." <<std::endl;
-
-
-	this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) );
-	this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) );
-
-
-	cout << "Setting gridCols to " << this->gridCols << "." <<std::endl;
-	cout << "Setting gridRows to " << this->gridRows << "." <<std::endl;
-
-	this->subgridValues.setSize(this->gridCols*this->gridRows);
-	this->subgridValues.setValue(0);
-	this->boundaryConditions.setSize(this->gridCols*this->gridRows);
-	this->boundaryConditions.setValue(0);
-	this->calculationsCount.setSize(this->gridCols*this->gridRows);
-	this->calculationsCount.setValue(0);
-
-	for(int i = 0; i < this->subgridValues.getSize(); i++ )
-	{
-		this->subgridValues[i] = INT_MAX;
-		this->boundaryConditions[i] = 0;
-	}
-
-	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
-
-	if(!this->work_u.setSize(stretchedSize))
-		cerr << "Could not allocate memory for stretched grid." <<std::endl;
-	if(!this->map_stretched.setSize(stretchedSize))
-		cerr << "Could not allocate memory for stretched map." <<std::endl;
-	if(!this->unusedCell.setSize(stretchedSize))
-		cerr << "Could not allocate memory for supporting stretched grid." <<std::endl;
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-	for(int i = 0; i < stretchedSize; i++)
-	{
-		this->unusedCell[i] = 1;
-		int diff =(this->n*this->gridCols) - idealStretch ;
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if(i%(this->n*this->gridCols) - idealStretch  >= 0)
-		{
-			k+= i%(this->n*this->gridCols) - idealStretch +1 ;
-		}
-
-		if(i/(this->n*this->gridCols) - idealStretch + 1  > 0)
-		{
-			k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ;
-		}
-
-
-		if(fabs(this->u0[i-k]) < mesh.template getSpaceStepsProducts< 1, 0 >()+mesh.template getSpaceStepsProducts< 0, 1 >() )
-			this->work_u[i] = this->u0[i-k];
-		else
-			this->work_u[i] = sign(this->u0[i-k])*MAP_SOLVER_MAX_VALUE;
-
-		this->map_stretched[i] = this->map[i-k];
-	}
-
-
-	cout << "Grid stretched." <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid()
-{
-	cout << "Contracting grid..." <<std::endl;
-	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
-
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-	for(int i = 0; i < stretchedSize; i++)
-	{
-		int diff =(this->n*this->gridCols) - idealStretch ;
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if((i%(this->n*this->gridCols) - idealStretch  < 0) && (i/(this->n*this->gridCols) - idealStretch + 1  <= 0))
-		{
-			this->u0[i-k] = this->work_u[i];
-		}
-
-	}
-
-	cout << "Grid contracted" <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const
-{
-	VectorType u;
-	u.setSize(this->n*this->n);
-
-	for( int j = 0; j < u.getSize(); j++)
-	{
-		u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols
-		                     + (i % this->gridCols) * this->n
-		                     + (j/this->n) * this->n*this->gridCols
-		                     + (j % this->n) ];
-	}
-	return u;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i )
-{
-
-	for( int j = 0; j < this->n*this->n; j++)
-	{
-		int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n);
-		if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) )
-		{
-			this->work_u[index] = u[j];
-			this->unusedCell[index] = 0;
-		}
-	}
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map)
-{
-
-	VectorType fu;
-
-	fu.setLike(u);
-	fu.setValue( 0.0 );
-
-
-
-	bool tmp = false;
-	for(int i = 0; i < u.getSize(); i++)
-	{
-		if(u[0]*u[i] <= 0.0)
-			tmp=true;
-		int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1);
-		if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0)
-			tmp = true;
-	}
-
-
-	double value = sign(u[0]) * u.absMax();
-
-	if(tmp)
-	{}
-
-
-	//north - 1, east - 2, west - 4, south - 8
-	else if(boundaryCondition == 4)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j = 1;j < this->n; j++)
-				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
-				u[i*this->n + j] = value;// u[i*this->n];
-	}
-	else if(boundaryCondition == 2)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j =0 ;j < this->n -1; j++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
-				u[i*this->n + j] = value;// u[(i+1)*this->n - 1];
-	}
-	else if(boundaryCondition == 1)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 0;i < this->n - 1; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
-				u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
-	}
-	else if(boundaryCondition == 8)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 1;i < this->n; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
-				u[i*this->n + j] = value;// u[j];
-	}
-
-
-
-   double time = 0.0;
-   double currentTau = this->tau0;
-   double finalTime = this->stopTime;// + 3.0*(u.max() - u.min());
-   if( time + currentTau > finalTime ) currentTau = finalTime - time;
-
-   double maxResidue( 1.0 );
-   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-   for( int i = 0; i < u.getSize(); i ++ )
-   {
-		if(map[i] == 0.0)
-		{
-			u[i] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE;
-		}
-   }
-
-   while( time < finalTime )
-   {
-      /****
-       * Compute the RHS
-       */
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-			Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()));
-			Entity.refresh();
-			neighborEntities.refresh(subMesh,Entity.getIndex());
-			if(map[i] != 0.0)
-				fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities,map);
-      }
-      maxResidue = fu. absMax();
-
-
-      if(maxResidue != 0.0)
-    	  currentTau =  fabs(this -> cflCondition / maxResidue);
-
-
-      if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())
-      {
-    	  currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
-      }
-
-
-      if( time + currentTau > finalTime ) currentTau = finalTime - time;
-
-
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-    	  if(map[i] != 0.0)
-    		  u[ i ] += currentTau * fu[ i ];
-      }
-      time += currentTau;
-
-   }
-   return u;
-}
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-	int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
-
-	*a = caller->work_u_cuda[th];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-	int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
-
-	if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) )
-	{
-		caller->work_u_cuda[index] = *a;
-		caller->unusedCell_cuda[index] = 0;
-
-	}
-
-	*a = caller->work_u_cuda[index];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i )
-{
-		int index = (blockIdx.y)*this->n*this->n*this->gridCols
-					+ (blockIdx.x)*this->n
-					+ threadIdx.y*this->n*this->gridCols
-					+ threadIdx.x;
-
-		if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )
-		{
-			this->work_u_cuda[index] = u;
-			this->unusedCell_cuda[index] = 0;
-
-		}
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID)
-{
-
-	__shared__ int tmp;
-	__shared__ double value;
-	volatile double* sharedTau = &u[blockDim.x*blockDim.y];
-	double* map_local = &u[2*blockDim.x*blockDim.y];
-
-	int i = threadIdx.x;
-	int j = threadIdx.y;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
-
-	/* LOAD MAP */
-	map_local[l]=this->map_stretched_cuda[gid];
-	if(map_local[l] != 0.0)
-		map_local[l] = 1.0/map_local[l];
-	/* LOADED */
-
-	bool computeFU = !((i == 0 && (boundaryCondition & 4)) or
-			 (i == blockDim.x - 1 && (boundaryCondition & 2)) or
-			 (j == 0 && (boundaryCondition & 8)) or
-			 (j == blockDim.y - 1  && (boundaryCondition & 1)));
-
-	if(l == 0)
-	{
-		tmp = 0;
-		int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1);
-		if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0)
-			tmp = 1;
-	}
-	__syncthreads();
-
-
-	if(tmp !=1)
-	{
-		if(computeFU)
-		{
-			if(boundaryCondition == 4)
-				u[l] = u[threadIdx.y * blockDim.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x);
-			else if(boundaryCondition == 2)
-				u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);
-			else if(boundaryCondition == 8)
-				u[l] = u[threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y);
-			else if(boundaryCondition == 1)
-				u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y);
-		}
-	}
-
-   double time = 0.0;
-   __shared__ double currentTau;
-   double cfl = this->cflCondition;
-   double fu = 0.0;
-
-   double finalTime = this->stopTime;
-   if(boundaryCondition == 0)
-	   finalTime*=2.0;
-   __syncthreads();
-
-   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-   Entity.setCoordinates(Containers::StaticVector<2,int>(i,j));
-   Entity.refresh();
-   neighborEntities.refresh(subMesh,Entity.getIndex());
-
-
-	if(map_local[l] == 0.0)
-	{
-		u[l] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE;
-		computeFU = false;
-	}
-	__syncthreads();
-
-
-   while( time < finalTime )
-   {
-	  sharedTau[l] = finalTime;
-
-	  if(computeFU)
-	  {
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j), u, time, boundaryCondition, neighborEntities, map_local);
-	  	  sharedTau[l]=abs(cfl/fu);
-	  }
-
-
-
-      if(l == 0)
-      {
-    	  if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())	sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
-      }
-      else if(l == blockDim.x*blockDim.y - 1)
-    	  if( time + sharedTau[l] > finalTime )		sharedTau[l] = finalTime - time;
-
-
-      if((blockDim.x == 16) && (l < 128))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]);
-      __syncthreads();
-      if((blockDim.x == 16) && (l < 64))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]);
-      __syncthreads();
-      if(l < 32)    							sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]);
-      if(l < 16)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]);
-      if(l < 8)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]);
-      if(l < 4)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]);
-      if(l < 2)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]);
-      if(l < 1)									currentTau   = Min(sharedTau[l],sharedTau[l+1]);
-      __syncthreads();
-
-      u[l] += currentTau * fu;
-      time += currentTau;
-   }
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const
-{
-
-	return ((i / (this->gridCols*this->n*this->n))*this->gridCols
-			+ (i % (this->gridCols*this->n))/this->n);
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const
-{
-	return this->subgridValues_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value)
-{
-	this->subgridValues_cuda[i] = value;
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const
-{
-	return this->boundaryConditions_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value)
-{
-	this->boundaryConditions_cuda[i] = value;
-}
-
-
-
-//north - 1, east - 2, west - 4, south - 8
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void synchronizeCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-
-	__shared__ int boundary[4]; // north,east,west,south
-	__shared__ int subgridValue;
-	__shared__ int newSubgridValue;
-
-
-	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
-	double u = cudaSolver->work_u_cuda[gid];
-	double u_cmp;
-	int subgridValue_cmp=INT_MAX;
-	int boundary_index=0;
-
-
-	if(threadIdx.x+threadIdx.y == 0)
-	{
-		subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x);
-		boundary[0] = 0;
-		boundary[1] = 0;
-		boundary[2] = 0;
-		boundary[3] = 0;
-		newSubgridValue = 0;
-	}
-	__syncthreads();
-
-
-
-	if(		(threadIdx.x == 0 				 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == 0 				 /*	&& (cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.x == blockDim.x - 1 	 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == blockDim.y - 1 	 /*	&& (cudaSolver->currentStep & 1)*/) 		)
-	{
-		if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1);
-			boundary_index = 2;
-		}
-
-		if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1);
-			boundary_index = 1;
-		}
-
-		__threadfence();
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-			u=u_cmp;
-		}
-		__threadfence();
-		if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x);
-			boundary_index = 3;
-		}
-		if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x);
-			boundary_index = 0;
-		}
-
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-		}
-	}
-	__threadfence();
-	__syncthreads();
-
-	if(threadIdx.x+threadIdx.y == 0)
-	{
-		if(subgridValue == INT_MAX && newSubgridValue !=0)
-			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX);
-
-		cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, 	boundary[0] +
-																				2 * boundary[1] +
-																				4 * boundary[2] +
-																				8 * boundary[3]);
-
-
-		if(blockIdx.x+blockIdx.y ==0)
-		{
-			cudaSolver->currentStep += 1;
-			*(cudaSolver->runcuda) = 0;
-		}
-	}
-
-}
-
-
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver)
-{
-
-
-	int stepValue = cudaSolver->currentStep + 4;
-	if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
-			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue);
-
-	atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x));
-}
-
-
-
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr)
-{
-
-
-	cudaSolver->work_u_cuda = ptr;
-	cudaSolver->map_stretched_cuda = tmp_map_ptr;
-	cudaSolver->unusedCell_cuda = ptr3;
-	cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
-	cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
-	cudaSolver->runcuda = ptr2;
-	*(cudaSolver->runcuda) = 1;
-
-/* CHANGED !!!!!! from 1 to 0*/	cudaSolver->currentStep = 0;
-
-	printf("GPU memory allocated.\n");
-
-	for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++)
-	{
-		cudaSolver->subgridValues_cuda[i] = INT_MAX;
-		cudaSolver->boundaryConditions_cuda[i] = 0;
-	}
-
-	printf("GPU memory initialized.\n");
-}
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
-
-{
-	extern __shared__ double u[];
-
-	int i = blockIdx.y * gridDim.x + blockIdx.x;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-
-	__shared__ int containsCurve;
-	if(l == 0)
-		containsCurve = 0;
-
-
-	caller->getSubgridCUDA2D(i,caller, &u[l]);
-	__syncthreads();
-
-	if(u[0] * u[l] <= 0.0)
-		atomicMax( &containsCurve, 1);
-
-	__syncthreads();
-	if(containsCurve == 1)
-	{
-		caller->runSubgridCUDA2D(0,u,i);
-		caller->insertSubgridCUDA2D(u[l],i);
-		__syncthreads();
-		if(l == 0)
-			caller->setSubgridValueCUDA2D(i, 4);
-	}
-
-
-}
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
-{
-	extern __shared__ double u[];
-	int i = blockIdx.y * gridDim.x + blockIdx.x;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-	int bound = caller->getBoundaryConditionCUDA2D(i);
-
-	if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0)
-	{
-		caller->getSubgridCUDA2D(i,caller, &u[l]);
-
-
-		if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4)
-		{
-			if(bound & 1)
-			{
-				caller->runSubgridCUDA2D(1,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 2)
-			{
-				caller->runSubgridCUDA2D(2,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 4)
-			{
-				caller->runSubgridCUDA2D(4,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 8)
-			{
-				caller->runSubgridCUDA2D(8,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-		}
-		else
-		{
-
-			if(bound == 1)
-			{
-				caller->runSubgridCUDA2D(1,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound == 2)
-			{
-				caller->runSubgridCUDA2D(2,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound == 4)
-			{
-				caller->runSubgridCUDA2D(4,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound == 8)
-			{
-				caller->runSubgridCUDA2D(8,u,i);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-		}
-
-		if(bound & 3)
-		{
-			caller->runSubgridCUDA2D(3,u,i);
-			caller->updateSubgridCUDA2D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if(bound & 5)
-		{
-			caller->runSubgridCUDA2D(5,u,i);
-			caller->updateSubgridCUDA2D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if(bound & 10)
-		{
-			caller->runSubgridCUDA2D(10,u,i);
-			caller->updateSubgridCUDA2D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if(bound & 12)
-		{
-			caller->runSubgridCUDA2D(12,u,i);
-			caller->updateSubgridCUDA2D(i,caller, &u[l]);
-			__syncthreads();
-		}
-
-
-		if(l==0)
-		{
-			caller->setBoundaryConditionCUDA2D(i, 0);
-			caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 );
-		}
-
-
-	}
-
-
-
-}
-
-#endif /*HAVE_CUDA*/
-
-#endif /* TNLPARALLELMAPSOLVER2D_IMPL_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt
deleted file mode 100644
index f6a00127c7f79344a0c9303c1a0f4b2a8ad84832..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-set( tnl_hamilton_jacobi_parallel_SOURCES
-#     MainBuildConfig.h
-#     tnlParallelEikonalSolver2D_impl.h
-#     tnlParallelEikonalSolver3D_impl.h
-#     tnlParallelEikonalSolver.h
-#     parallelEikonalConfig.h 
-     main.cpp)
-
-
-IF(  BUILD_CUDA ) 
-	CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel main.cu)
-ELSE(  BUILD_CUDA )                
-	ADD_EXECUTABLE(hamilton-jacobi-parallel main.cpp)
-ENDIF( BUILD_CUDA )
-target_link_libraries (hamilton-jacobi-parallel tnl )
-
-
-INSTALL( TARGETS hamilton-jacobi-parallel
-         RUNTIME DESTINATION bin
-         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
-#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_SOURCES}
-#         DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel )
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h
deleted file mode 100644
index ed3d686eb99379af1589d734eac9b5812cccdedf..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
-                          MainBuildConfig.h  -  description
-                             -------------------
-    begin                : Jul 7, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef MAINBUILDCONFIG_H_
-#define MAINBUILDCONFIG_H_
-
-#include <solvers/tnlBuildConfigTags.h>
-
-class MainBuildConfig
-{
-   public:
-
-      static void print() {std::cerr << "MainBuildConfig" <<std::endl; }
-};
-
-/****
- * Turn off support for float and long double.
- */
-template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
-template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
-
-/****
- * Turn off support for short int and long int indexing.
- */
-template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
-template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
-
-/****
- * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
- */
-template< int Dimensions, typename Real, typename Device, typename Index >
-   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
-      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
-                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
-                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
-                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
-
-/****
- * Please, chose your preferred time discretisation  here.
- */
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
-
-/****
- * Only the Runge-Kutta-Merson solver is enabled by default.
- */
-template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
-
-#endif /* MAINBUILDCONFIG_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp
deleted file mode 100644
index b13498e17330fae7bb00a0bdc2abcc7a19f8e7a8..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Jul 8 , 2014
-    copyright            : (C) 2014 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu
deleted file mode 100644
index 7101976712e153d73c5f0979b211164a36ec648d..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cu  -  description
-                             -------------------
-    begin                : Mar 30 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel/main.h
deleted file mode 100644
index dbaebdcebd3b2bdf0509eda61729c1b11579716a..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/***************************************************************************
-                          main.h  -  description
-                             -------------------
-    begin                : Mar 30 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "tnlParallelEikonalSolver.h"
-#include "parallelEikonalConfig.h"
-#include "MainBuildConfig.h"
-#include <solvers/tnlBuildConfigTags.h>
-#include <operators/hamilton-jacobi/godunov-eikonal/parallelGodunovEikonal.h>
-#include <mesh/tnlGrid.h>
-#include <core/tnlDevice.h>
-#include <time.h>
-#include <ctime>
-
-typedef MainBuildConfig BuildConfig;
-
-int main( int argc, char* argv[] )
-{
-	time_t start;
-	time_t stop;
-	time(&start);
-	std::clock_t start2= std::clock();
-   Config::ParameterContainer parameters;
-   tnlConfigDescription configDescription;
-   parallelEikonalConfig< BuildConfig >::configSetup( configDescription );
-
-   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
-      return false;
-
-   //if (parameters.GetParameter <String>("scheme") == "godunov")
-   //{
-   tnlDeviceEnum device;
-   device = TNL::Devices::HostDevice;
-
-   const int& dim = parameters.getParameter< int >( "dim" );
-
-  if(dim == 2)
-  {
-
-	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
-		/*#ifdef HAVE_CUDA
-		   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
-		#endif
-		#ifndef HAVE_CUDA*/
-	   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-		/*#endif*/
-
-	   if(device==TNL::Devices::HostDevice)
-	   {
-		   typedef TNL::Devices::Host Device;
-
-
-		   tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
-	   else if(device==tnlCudaDevice )
-	   {
-		   typedef tnlCuda Device;
-		   //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType;
-
-		   tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
-  // }
-  }
-  else if(dim == 3)
-  {
-
-	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost;
-		/*#ifdef HAVE_CUDA
-		   typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
-		#endif
-		#ifndef HAVE_CUDA*/
-	   typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice;
-		/*#endif*/
-
-	   if(device==TNL::Devices::HostDevice)
-	   {
-		   typedef TNL::Devices::Host Device;
-
-
-		   tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
-	   else if(device==tnlCudaDevice )
-	   {
-		   typedef tnlCuda Device;
-		   //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType;
-
-		   tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver;
-		   if(!solver.init(parameters))
-		   {
-			  std::cerr << "Solver failed to initialize." <<std::endl;
-			   return EXIT_FAILURE;
-		   }
-		  std::cout << "-------------------------------------------------------------" <<std::endl;
-		  std::cout << "Starting solver loop..." <<std::endl;
-		   solver.run();
-	   }
- // }
-  }
-
-   time(&stop);
-  std::cout <<std::endl;
-  std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl;
-   return EXIT_SUCCESS;
-}
-
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile
deleted file mode 100644
index bfdc1ef236ca02ecfe6bc88f81d872e9524ec621..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-TNL_VERSION=0.1
-TNL_INSTALL_DIR=${HOME}/local/lib
-TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION}
-
-TARGET = hamiltonJacobiParallelSolver
-#CONFIG_FILE = $(TARGET).cfg.desc
-INSTALL_DIR = ${HOME}/local
-CXX = g++
-CUDA_CXX = nvcc
-OMP_FLAGS = -DHAVE_OPENMP -fopenmp
-CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG
-LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp
-
-SOURCES = main.cpp
-HEADERS = 
-OBJECTS = main.o
-DIST = $(SOURCES) Makefile
-
-all: $(TARGET)
-clean: 
-	rm -f $(OBJECTS)
-	rm -f $(TARGET)-conf.h	
-
-dist: $(DIST)
-	tar zcvf $(TARGET).tgz $(DIST) 
-
-install: $(TARGET)
-	cp $(TARGET) $(INSTALL_DIR)/bin
-	cp $(CONFIG_FILE) $(INSTALL_DIR)/share
-
-uninstall: $(TARGET)
-	rm -f $(INSTALL_DIR)/bin/$(TARGET) 
-	rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share
-
-$(TARGET): $(OBJECTS)
-	$(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS)
-
-%.o: %.cpp $(HEADERS)
-	$(CXX) -c -o $@ $(CXX_FLAGS) $<
-
-
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h
deleted file mode 100644
index c27f5ebb39e5c4db31ed13d1a8e80b8ca8915d51..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/***************************************************************************
-                          parallelEikonalConfig.h  -  description
-                             -------------------
-    begin                : Oct 5, 2014
-    copyright            : (C) 2014 by Tomas Sobotik
-    email                :
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_
-#define HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_
-
-#include <config/tnlConfigDescription.h>
-
-template< typename ConfigTag >
-class parallelEikonalConfig
-{
-   public:
-      static void configSetup( tnlConfigDescription& config )
-      {
-         config.addDelimiter( "Parallel Eikonal solver settings:" );
-         config.addEntry        < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" );
-         config.addEntry       < String > ( "scheme", "This defines scheme used for discretization.", "godunov" );
-         config.addEntryEnum( "godunov" );
-         config.addEntryEnum( "upwind" );
-         config.addRequiredEntry        < String > ( "initial-condition", "Initial condition for solver");
-         config.addEntry       < String > ( "mesh", "Name of mesh.", "mesh.tnl" );
-         config.addEntry        < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 );
-         config.addEntry        < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 );
-         config.addRequiredEntry        < double > ( "stop-time", " Final time for solver");
-         config.addRequiredEntry        < double > ( "initial-tau", " initial tau for solver" );
-         config.addEntry        < double > ( "cfl-condition", " CFL condition", 0.0 );
-         config.addEntry        < int > ( "subgrid-size", "Subgrid size.", 16 );
-         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
-      }
-};
-
-#endif /* HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/run b/src/TNL/Legacy/hamilton-jacobi-parallel/run
deleted file mode 100755
index 3aece294a9c1189cd885acbe459dba20be713716..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/run
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-#GRID_SIZES="0897"
-GRID_SIZES="0008 0015 0029 0057 0113 0225 0449"
-#GRID_SIZES="1793"
-
-dimensions=2
-
-size=2
-
-time=3
-
-for grid_size in $GRID_SIZES;
-
-do
-
-	rm -r grid-${grid_size}
-   	mkdir grid-${grid_size}
-   	cd grid-${grid_size}
-
-	tnl-grid-setup --dimensions $dimensions \
-	               --origin-x -1.0 \
-	               --origin-y -1.0 \
-	               --origin-z -1.0 \
-	               --proportions-x $size \
-	               --proportions-y $size \
-	               --proportions-z $size \
-	               --size-x ${grid_size} \
-	               --size-y ${grid_size} \
-	               --size-z ${grid_size}
-
-	tnl-init --test-function sdf-para \
-		     --offset 0.25 \
-	             --output-file init.tnl \
-		     --final-time 0.0 \
-		     --snapshot-period 0.1 \
-
-
-	tnl-init --test-function sdf-para-sdf \
-		     --offset 0.25 \
-	             --output-file sdf.tnl \
-		     --final-time 0.0 \
-		     --snapshot-period 0.1
-
-	hamilton-jacobi-parallel --initial-condition init.tnl \
-	              --cfl-condition 1.0e-1 \
-		      	  --mesh mesh.tnl \
-		     	  --initial-tau 1.0e-3 \
-		      	  --epsilon 1.0 \
-	        	  --delta 0.0 \
-	       	      --stop-time $time \
-		          --scheme godunov \
-		          --subgrid-size 8
-
-        tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff
-	
-	cd ..
-
-done
-
-
-./tnl-err2eoc-2.py --format txt --size $size *.diff
-
-              
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py
deleted file mode 100755
index f8cde3768e9b76156507e133f8bc3ecaa526fc71..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-
-import sys, string, math
-
-arguments = sys. argv[1:]
-format = "txt"
-output_file_name = "eoc-table.txt"
-input_files = []
-verbose = 1
-size = 1.0
-
-i = 0
-while i < len( arguments ):
-   if arguments[ i ] == "--format":
-      format = arguments[ i + 1 ]
-      i = i + 2
-      continue
-   if arguments[ i ] == "--output-file":
-      output_file_name = arguments[ i + 1 ]
-      i = i + 2
-      continue
-   if arguments[ i ] == "--verbose":
-       verbose = float( arguments[ i + 1 ] )
-       i = i +2
-       continue
-   if arguments[ i ] == "--size":
-       size = float( arguments[ i + 1 ] )
-       i = i +2
-       continue
-   input_files. append( arguments[ i ] )
-   i = i + 1
-
-if not verbose == 0:
-   print "Writing to " + output_file_name + " in " + format + "."
-
-h_list = []
-l1_norm_list = []
-l2_norm_list = []
-max_norm_list = []
-items = 0
-
-for file_name in input_files:
-   if not verbose == 0:
-       print "Processing file " + file_name
-   file = open( file_name, "r" )
-   
-   l1_max = 0.0
-   l_max_max = 0.0
-   file.readline();
-   file.readline();
-   for line in file. readlines():
-         data = string. split( line )
-         h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) )
-         l1_norm_list. append( float( data[ 1 ] ) )
-         l2_norm_list. append( float( data[ 2 ] ) )
-         max_norm_list. append( float( data[ 3 ] ) )
-         items = items + 1
-         if not verbose == 0:
-            print line
-   file. close()
-
-h_width = 12
-err_width = 15
-file = open( output_file_name, "w" )
-if format == "latex":
-      file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" )
-      file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
-      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" )
-      file. write( " " + string. rjust( " ", h_width ) + "&" +
-                string. rjust( "Error", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) + "&" +
-                string. rjust( "Error", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) + "&" +
-                string. rjust( "Error.", err_width ) + "&" +
-                string. rjust( "{\\bf EOC}", err_width ) +
-                "\\\\ \\hline \\hline \n")
-if format == "txt":
-    file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-    file. write( "|       h      |     L1 Err.    |     L1 EOC.    |     L2 Err.    |      L2 EOC    |    MAX Err.    |     MAX EOC    |\n" )
-    file. write( "+==============+================+================+================+================+================+================+\n" )
-                  
-
-i = 0
-while i < items:
-   if i == 0:
-      if format == "latex":
-         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + 
-                      string. rjust( " ", err_width ) + "&"+ 
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( " ", err_width ) + "&" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( " ", err_width ) + "\\\\\n" )
-      if format == "txt":
-         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + 
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( " ", err_width ) + " |\n" )
-         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-      i = i + 1;
-      continue
-   if h_list[ i ] == h_list[ i - 1 ]:
-      print "Unable to count eoc since h[ " + \
-      str( i ) + " ] = h[ " + str( i - 1 ) + \
-      " ] = " + str( h_list[ i ] ) + ". \n"
-      file. write( " eoc error:  h[ " + \
-      str( i ) + " ] = h[ " + str( i - 1 ) + \
-      " ] = " + str( h_list[ i ] ) + ". \n" )
-   else:
-      h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] )
-      l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] )
-      l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] )
-      max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] )
-      if format == "latex":
-         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
-                      string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" )
-      if format == "txt":
-         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" +
-                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" +
-                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" +
-                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
-                      string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" )
-         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
-   i = i + 1
-
-if format == "latex":
-   file. write( "\\hline \n" )
-   file. write( "\\end{tabular} \n" )
-    
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
deleted file mode 100644
index 19cdd949359d4349172af820def49169146c8717..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
+++ /dev/null
@@ -1,366 +0,0 @@
-/***************************************************************************
-                          tnlParallelEikonalSolver.h  -  description
-                             -------------------
-    begin                : Nov 28 , 2014
-    copyright            : (C) 2014 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLPARALLELEIKONALSOLVER_H_
-#define TNLPARALLELEIKONALSOLVER_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <functions/tnlMeshFunction.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
- #include <omp.h>
-
-
-#include <ctime>
-
-#ifdef HAVE_CUDA
-#include <core/tnlCuda.h>
-#endif
-
-
-template< int Dimension,
-		  typename SchemeHost,
-		  typename SchemeDevice,
-		  typename Device,
-		  typename RealType = double,
-          typename IndexType = int >
-class tnlParallelEikonalSolver
-{};
-
-template<typename SchemeHost, typename SchemeDevice, typename Device>
-class tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >
-{
-public:
-
-	typedef SchemeDevice SchemeTypeDevice;
-	typedef SchemeHost SchemeTypeHost;
-	typedef Device DeviceType;
-	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
-	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
-	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType;
-#ifdef HAVE_CUDA
-	typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
-	typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
-	typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA;
-#endif
-	tnlParallelEikonalSolver();
-	bool init( const Config::ParameterContainer& parameters );
-	void run();
-
-	void test();
-
-/*private:*/
-
-
-	void synchronize();
-
-	int getOwner( int i) const;
-
-	int getSubgridValue( int i ) const;
-
-	void setSubgridValue( int i, int value );
-
-	int getBoundaryCondition( int i ) const;
-
-	void setBoundaryCondition( int i, int value );
-
-	void stretchGrid();
-
-	void contractGrid();
-
-	VectorType getSubgrid( const int i ) const;
-
-	void insertSubgrid( VectorType u, const int i );
-
-	VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);
-
-
-	tnlMeshFunction<MeshType> u0;
-	VectorType work_u;
-	IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
-	MeshType mesh, subMesh;
-
-//	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
-
-	SchemeHost schemeHost;
-	SchemeDevice schemeDevice;
-	double delta, tau0, stopTime,cflCondition;
-	int gridRows, gridCols, gridLevels, currentStep, n;
-
-	std::clock_t start;
-	double time_diff;
-
-
-	tnlDeviceEnum device;
-
-	tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
-	{
-		return this;
-	};
-
-#ifdef HAVE_CUDA
-
-	tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;
-
-	double* work_u_cuda;
-
-	int* subgridValues_cuda;
-	int*boundaryConditions_cuda;
-	int* unusedCell_cuda;
-	int* calculationsCount_cuda;
-	double* tmpw;
-	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
-	//SchemeDevice scheme_cuda;
-	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
-	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;
-
-	int* runcuda;
-	int run_host;
-
-
-	__device__ void getSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void updateSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void insertSubgridCUDA2D( double u, const int i );
-
-	__device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID);
-
-	/*__global__ void runCUDA();*/
-
-	//__device__ void synchronizeCUDA();
-
-	__device__ int getOwnerCUDA2D( int i) const;
-
-	__device__ int getSubgridValueCUDA2D( int i ) const;
-
-	__device__ void setSubgridValueCUDA2D( int i, int value );
-
-	__device__ int getBoundaryConditionCUDA2D( int i ) const;
-
-	__device__ void setBoundaryConditionCUDA2D( int i, int value );
-
-	//__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/
-
-#endif
-
-};
-
-
-
-
-
-
-
-	template<typename SchemeHost, typename SchemeDevice, typename Device>
-	class tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >
-	{
-	public:
-
-		typedef SchemeDevice SchemeTypeDevice;
-		typedef SchemeHost SchemeTypeHost;
-		typedef Device DeviceType;
-		typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType;
-		typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType;
-		typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshType;
-	#ifdef HAVE_CUDA
-		typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA;
-		typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA;
-		typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshTypeCUDA;
-	#endif
-		tnlParallelEikonalSolver();
-		bool init( const Config::ParameterContainer& parameters );
-		void run();
-
-		void test();
-
-	/*private:*/
-
-
-		void synchronize();
-
-		int getOwner( int i) const;
-
-		int getSubgridValue( int i ) const;
-
-		void setSubgridValue( int i, int value );
-
-		int getBoundaryCondition( int i ) const;
-
-		void setBoundaryCondition( int i, int value );
-
-		void stretchGrid();
-
-		void contractGrid();
-
-		VectorType getSubgrid( const int i ) const;
-
-		void insertSubgrid( VectorType u, const int i );
-
-		VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);
-
-
-		tnlMeshFunction<MeshType> u0;
-		VectorType work_u;
-		IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
-		MeshType mesh, subMesh;
-		SchemeHost schemeHost;
-		SchemeDevice schemeDevice;
-		double delta, tau0, stopTime,cflCondition;
-		int gridRows, gridCols, gridLevels, currentStep, n;
-
-		std::clock_t start;
-		double time_diff;
-
-
-		tnlDeviceEnum device;
-
-		tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
-		{
-			return this;
-		};
-
-#ifdef HAVE_CUDA
-
-	tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;
-
-	double* work_u_cuda;
-
-	int* subgridValues_cuda;
-	int*boundaryConditions_cuda;
-	int* unusedCell_cuda;
-	int* calculationsCount_cuda;
-	double* tmpw;
-	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
-	//SchemeDevice scheme_cuda;
-	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
-	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;
-
-	int* runcuda;
-	int run_host;
-
-
-	__device__ void getSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void updateSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
-
-	__device__ void insertSubgridCUDA3D( double u, const int i );
-
-	__device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID);
-
-	/*__global__ void runCUDA();*/
-
-	//__device__ void synchronizeCUDA();
-
-	__device__ int getOwnerCUDA3D( int i) const;
-
-	__device__ int getSubgridValueCUDA3D( int i ) const;
-
-	__device__ void setSubgridValueCUDA3D( int i, int value );
-
-	__device__ int getBoundaryConditionCUDA3D( int i ) const;
-
-	__device__ void setBoundaryConditionCUDA3D( int i, int value );
-
-	//__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-	/*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/
-
-#endif
-
-};
-
-
-
-
-
-
-#ifdef HAVE_CUDA
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void runCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initRunCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initCUDA2D( tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronizeCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronize2CUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-
-
-
-
-
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void runCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initRunCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initCUDA3D( tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronizeCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void synchronize2CUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
-#endif
-
-
-#ifdef HAVE_CUDA
-__cuda_callable__
-double fabsMin( double x, double y)
-{
-	double fx = fabs(x);
-
-	if(Min(fx,fabs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__cuda_callable__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-#endif
-
-#include "tnlParallelEikonalSolver2D_impl.h"
-#include "tnlParallelEikonalSolver3D_impl.h"
-#endif /* TNLPARALLELEIKONALSOLVER_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h
deleted file mode 100644
index 76cf49bc8aa28890d598fe010aa777acb2c6edfd..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h
+++ /dev/null
@@ -1,1928 +0,0 @@
-/***************************************************************************
-                          tnlParallelEikonalSolver2D_impl.h  -  description
-                             -------------------
-    begin                : Nov 28 , 2014
-    copyright            : (C) 2014 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLPARALLELEIKONALSOLVER2D_IMPL_H_
-#define TNLPARALLELEIKONALSOLVER2D_IMPL_H_
-
-
-#include "tnlParallelEikonalSolver.h"
-#include <core/mfilename.h>
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
-{
-	cout << "a" <<std::endl;
-	this->device = tnlCudaDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-	run_host = 1;
-	}
-#endif
-
-	cout << "b" <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test()
-{
-/*
-	for(int i =0; i < this->subgridValues.getSize(); i++ )
-	{
-		insertSubgrid(getSubgrid(i), i);
-	}
-*/
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-
-bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters )
-{
-	cout << "Initializating solver..." <<std::endl;
-	const String& meshLocation = parameters.getParameter <String>("mesh");
-	this->mesh.load( meshLocation );
-
-	this->n = parameters.getParameter <int>("subgrid-size");
-	cout << "Setting N to " << this->n <<std::endl;
-
-	this->subMesh.setDimensions( this->n, this->n );
-	this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0),
-							 Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) );
-
-	this->subMesh.save("submesh.tnl");
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	this->u0.load( initialCondition );
-
-	//cout << this->mesh.getCellCenter(0) <<std::endl;
-
-	this->delta = parameters.getParameter <double>("delta");
-	this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >();
-
-	cout << "Setting delta to " << this->delta <<std::endl;
-
-	this->tau0 = parameters.getParameter <double>("initial-tau");
-	cout << "Setting initial tau to " << this->tau0 <<std::endl;
-	this->stopTime = parameters.getParameter <double>("stop-time");
-
-	this->cflCondition = parameters.getParameter <double>("cfl-condition");
-	this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >());
-	cout << "Setting CFL to " << this->cflCondition <<std::endl;
-
-	stretchGrid();
-	this->stopTime /= (double)(this->gridCols);
-	this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0));
-	cout << "Setting stopping time to " << this->stopTime <<std::endl;
-	//this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*this->mesh.template getSpaceStepsProducts< 1, 0 >();
-	//cout << "Setting stopping time to " << this->stopTime <<std::endl;
-
-	cout << "Initializating scheme..." <<std::endl;
-	if(!this->schemeHost.init(parameters))
-	{
-		cerr << "SchemeHost failed to initialize." <<std::endl;
-		return false;
-	}
-	cout << "Scheme initialized." <<std::endl;
-
-	test();
-
-	VectorType* tmp = new VectorType[subgridValues.getSize()];
-	bool containsCurve = false;
-
-#ifdef HAVE_CUDA
-
-	if(this->device == tnlCudaDevice)
-	{
-	/*cout << "Testing... " <<std::endl;
-	if(this->device == tnlCudaDevice)
-	{
-	if( !initCUDA2D(parameters, gridRows, gridCols) )
-		return false;
-	}*/
-		//cout << "s" <<std::endl;
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >));
-	//cout << "s" <<std::endl;
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice);
-	//cout << "s" <<std::endl;
-	double** tmpdev = NULL;
-	cudaMalloc(&tmpdev, sizeof(double*));
-	//double* tmpw;
-	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
-	cudaMalloc(&(this->runcuda), sizeof(int));
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	int* tmpUC;
-	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
-	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
-
-	initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	//cout << "s " <<std::endl;
-	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
-	double* tmpu = NULL;
-
-	cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
-	//printf("%p %p \n",tmpu,tmpw);
-	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	//cout << "s "<<std::endl;
-
-	}
-#endif
-
-	if(this->device == TNL::Devices::HostDevice)
-	{
-	for(int i = 0; i < this->subgridValues.getSize(); i++)
-	{
-
-		if(! tmp[i].setSize(this->n * this->n))
-			cout << "Could not allocate tmp["<< i <<"] array." <<std::endl;
-			tmp[i] = getSubgrid(i);
-		containsCurve = false;
-
-		for(int j = 0; j < tmp[i].getSize(); j++)
-		{
-			if(tmp[i][0]*tmp[i][j] <= 0.0)
-			{
-				containsCurve = true;
-				j=tmp[i].getSize();
-			}
-
-		}
-		if(containsCurve)
-		{
-			//cout << "Computing initial SDF on subgrid " << i << "." <<std::endl;
-			tmp[i] = runSubgrid(0, tmp[i],i);
-			insertSubgrid(tmp[i], i);
-			setSubgridValue(i, 4);
-			//cout << "Computed initial SDF on subgrid " << i  << "." <<std::endl;
-		}
-		containsCurve = false;
-
-	}
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-//		cout << "pre 1 kernel" <<std::endl;
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-//		cout << "post 1 kernel" <<std::endl;
-
-	}
-#endif
-
-
-	this->currentStep = 1;
-	if(this->device == TNL::Devices::HostDevice)
-		synchronize();
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
-		//cout << test[0] <<"   " << test[1] <<"   " << test[2] <<"   " << test[3] <<std::endl;
-		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->tmpw << "   " <<  test[0] <<"   " << test[1] << "   " <<test[2] << "   " <<test[3] <<std::endl;
-
-		TNL_CHECK_CUDA_DEVICE;
-
-		synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cout << test[0] << "   " <<test[1] <<"   " << test[2] << "   " <<test[3] <<std::endl;
-		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//TNL_CHECK_CUDA_DEVICE;
-		//cout << this->tmpw << "   " <<  test[0] << "   " <<test[1] << "   " <<test[2] <<"   " << test[3] <<std::endl;
-		//free(test);
-
-	}
-
-#endif
-	cout << "Solver initialized." <<std::endl;
-
-	return true;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run()
-{
-	if(this->device == TNL::Devices::HostDevice)
-	{
-
-	bool end = false;
-	while ((this->boundaryConditions.max() > 0 ) || !end)
-	{
-		if(this->boundaryConditions.max() == 0 )
-			end=true;
-		else
-			end=false;
-#ifdef HAVE_OPENMP
-#pragma omp parallel for num_threads(4) schedule(dynamic)
-#endif
-		for(int i = 0; i < this->subgridValues.getSize(); i++)
-		{
-			if(getSubgridValue(i) != INT_MAX)
-			{
-				VectorType tmp;
-				tmp.setSize(this->n * this->n);
-				//cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <<std::endl;
-
-				if(getSubgridValue(i) == currentStep+4)
-				{
-
-				if(getBoundaryCondition(i) & 1)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(1, tmp ,i);
-					insertSubgrid( tmp, i);
-					this->calculationsCount[i]++;
-				}
-				if(getBoundaryCondition(i) & 2)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(1, tmp ,i);
-					insertSubgrid( tmp, 2);
-					this->calculationsCount[i]++;
-				}
-				if(getBoundaryCondition(i) & 4)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(4, tmp ,i);
-					insertSubgrid( tmp, i);
-					this->calculationsCount[i]++;
-				}
-				if(getBoundaryCondition(i) & 8)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(8, tmp ,i);
-					insertSubgrid( tmp, i);
-					this->calculationsCount[i]++;
-				}
-				}
-
-				if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 1)//)
-					/*	&&(!(getBoundaryCondition(i) & 5) && !(getBoundaryCondition(i) & 10)) */)
-				{
-					//cout << "3 @ " << getBoundaryCondition(i) <<std::endl;
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(1, tmp ,i);
-					insertSubgrid( tmp, 3);
-				}
-				if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 1)//)
-					/*	&&(!(getBoundaryCondition(i) & 3) && !(getBoundaryCondition(i) & 12)) */)
-				{
-					//cout << "5 @ " << getBoundaryCondition(i) <<std::endl;
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(5, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 8)//)
-					/*	&&(!(getBoundaryCondition(i) & 12) && !(getBoundaryCondition(i) & 3))*/ )
-				{
-					//cout << "10 @ " << getBoundaryCondition(i) <<std::endl;
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(10, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if(   ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 8)//)
-					/*&&(!(getBoundaryCondition(i) & 10) && !(getBoundaryCondition(i) & 5)) */)
-				{
-					//cout << "12 @ " << getBoundaryCondition(i) <<std::endl;
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(12, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-
-
-				/*if(getBoundaryCondition(i))
-				{
-					insertSubgrid( runSubgrid(15, getSubgrid(i),i), i);
-				}*/
-
-				setBoundaryCondition(i, 0);
-
-				setSubgridValue(i, getSubgridValue(i)-1);
-
-			}
-		}
-		synchronize();
-	}
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		//cout << "fn" <<std::endl;
-		bool end_cuda = false;
-		dim3 threadsPerBlock(this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cudaMalloc(&runcuda,sizeof(bool));
-		//cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice);
-		//cout << "fn" <<std::endl;
-		bool* tmpb;
-		//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-		//cudaDeviceSynchronize();
-		//TNL_CHECK_CUDA_DEVICE;
-		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cout << "fn" <<std::endl;
-		int i = 1;
-		time_diff = 0.0;
-		while (run_host || !end_cuda)
-		{
-			cout << "Computing at step "<< i++ <<std::endl;
-			if(run_host != 0 )
-				end_cuda = true;
-			else
-				end_cuda = false;
-			//cout << "a" <<std::endl;
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			start = std::clock();
-			runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-			//cout << "a" <<std::endl;
-			cudaDeviceSynchronize();
-			time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-			//start = std::clock();
-			synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-
-			//cout << "a" <<std::endl;
-			//run_host = false;
-			//cout << "in kernel loop" << run_host <<std::endl;
-			//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-			cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost);
-			//cout << "in kernel loop" << run_host <<std::endl;
-		}
-		cout << "Solving time was: " << time_diff <<std::endl;
-		//cout << "b" <<std::endl;
-
-		//double* tmpu;
-		//cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice);
-		//cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->work_u.getData()[0] <<std::endl;
-
-		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
-		//cout << test[0] << test[1] << test[2] << test[3] <<std::endl;
-		cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->tmpw << "   " <<  test[0] << test[1] << test[2] << test[3] <<std::endl;
-		//free(test);
-
-		cudaDeviceSynchronize();
-	}
-#endif
-	contractGrid();
-	this->u0.save("u-00001.tnl");
-	cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl;
-	cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl;
-	cout << "Solver finished" <<std::endl;
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-		cudaFree(this->runcuda);
-		cudaFree(this->tmpw);
-		cudaFree(this->cudaSolver);
-	}
-#endif
-
-}
-
-//north - 1, east - 2, west - 4, south - 8
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-	cout << "Synchronizig..." <<std::endl;
-	int tmp1, tmp2;
-	int grid1, grid2;
-
-	if(this->currentStep & 1)
-	{
-		for(int j = 0; j < this->gridRows - 1; j++)
-		{
-			for (int i = 0; i < this->gridCols*this->n; i++)
-			{
-				tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i;
-				tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i;
-				grid1 = getSubgridValue(getOwner(tmp1));
-				grid2 = getSubgridValue(getOwner(tmp2));
-				if(getOwner(tmp1)==getOwner(tmp2))
-					cout << "i, j" << i << "," << j <<std::endl;
-				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					this->work_u[tmp2] = this->work_u[tmp1];
-					this->unusedCell[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp2), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp2)) & 8) )
-						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8);
-				}
-				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					this->work_u[tmp1] = this->work_u[tmp2];
-					this->unusedCell[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp1), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp1)) & 1) )
-						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1);
-				}
-			}
-		}
-
-	}
-	else
-	{
-		for(int i = 1; i < this->gridCols; i++)
-		{
-			for (int j = 0; j < this->gridRows*this->n; j++)
-			{
-				tmp1 = this->gridCols*this->n*j + i*this->n - 1;
-				tmp2 = this->gridCols*this->n*j + i*this->n ;
-				grid1 = getSubgridValue(getOwner(tmp1));
-				grid2 = getSubgridValue(getOwner(tmp2));
-				if(getOwner(tmp1)==getOwner(tmp2))
-					cout << "i, j" << i << "," << j <<std::endl;
-				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					this->work_u[tmp2] = this->work_u[tmp1];
-					this->unusedCell[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp2), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp2)) & 4) )
-						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4);
-				}
-				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					this->work_u[tmp1] = this->work_u[tmp2];
-					this->unusedCell[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						setSubgridValue(getOwner(tmp1), -INT_MAX);
-					}
-					if(! (getBoundaryCondition(getOwner(tmp1)) & 2) )
-						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2);
-				}
-			}
-		}
-	}
-
-
-	this->currentStep++;
-	int stepValue = this->currentStep + 4;
-	for (int i = 0; i < this->subgridValues.getSize(); i++)
-	{
-		if( getSubgridValue(i) == -INT_MAX )
-			setSubgridValue(i, stepValue);
-	}
-
-	cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl;
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const
-{
-
-	return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const
-{
-	return this->subgridValues[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value)
-{
-	this->subgridValues[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const
-{
-	return this->boundaryConditions[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value)
-{
-	this->boundaryConditions[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid()
-{
-	cout << "Stretching grid..." <<std::endl;
-
-
-	this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) );
-	this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) );
-
-	//this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ;
-	//this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ;
-
-	cout << "Setting gridCols to " << this->gridCols << "." <<std::endl;
-	cout << "Setting gridRows to " << this->gridRows << "." <<std::endl;
-
-	this->subgridValues.setSize(this->gridCols*this->gridRows);
-	this->subgridValues.setValue(0);
-	this->boundaryConditions.setSize(this->gridCols*this->gridRows);
-	this->boundaryConditions.setValue(0);
-	this->calculationsCount.setSize(this->gridCols*this->gridRows);
-	this->calculationsCount.setValue(0);
-
-	for(int i = 0; i < this->subgridValues.getSize(); i++ )
-	{
-		this->subgridValues[i] = INT_MAX;
-		this->boundaryConditions[i] = 0;
-	}
-
-	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
-
-	if(!this->work_u.setSize(stretchedSize))
-		cerr << "Could not allocate memory for stretched grid." <<std::endl;
-	if(!this->unusedCell.setSize(stretchedSize))
-		cerr << "Could not allocate memory for supporting stretched grid." <<std::endl;
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-	for(int i = 0; i < stretchedSize; i++)
-	{
-		this->unusedCell[i] = 1;
-		int diff =(this->n*this->gridCols) - idealStretch ;
-		//cout << "diff = " << diff <<endl;
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if(i%(this->n*this->gridCols) - idealStretch  >= 0)
-		{
-			//cout << i%(this->n*this->gridCols) - idealStretch +1 <<std::endl;
-			k+= i%(this->n*this->gridCols) - idealStretch +1 ;
-		}
-
-		if(i/(this->n*this->gridCols) - idealStretch + 1  > 0)
-		{
-			//cout << i/(this->n*this->gridCols) - idealStretch + 1  <<std::endl;
-			k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ;
-		}
-
-		//cout << "i = " << i << " : i-k = " << i-k <<std::endl;
-		/*int j=(i % (this->n*this->gridCols)) - ( (this->mesh.getDimensions().x() - this->n)/(this->n - 1) + this->mesh.getDimensions().x() - 1)
-				+ (this->n*this->gridCols - this->mesh.getDimensions().x())*(i/(this->n*this->n*this->gridCols)) ;
-
-		if(j > 0)
-			k += j;
-
-		int l = i-k - (this->u0.getSize() - 1);
-		int m = (l % this->mesh.getDimensions().x());
-
-		if(l>0)
-			k+= l + ( (l / this->mesh.getDimensions().x()) + 1 )*this->mesh.getDimensions().x() - (l % this->mesh.getDimensions().x());*/
-
-		this->work_u[i] = this->u0[i-k];
-		//cout << (i-k) <<endl;
-	}
-
-
-	cout << "Grid stretched." <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid()
-{
-	cout << "Contracting grid..." <<std::endl;
-	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
-
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-	for(int i = 0; i < stretchedSize; i++)
-	{
-		int diff =(this->n*this->gridCols) - idealStretch ;
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if((i%(this->n*this->gridCols) - idealStretch  < 0) && (i/(this->n*this->gridCols) - idealStretch + 1  <= 0))
-		{
-			//cout << i <<" : " <<i-k<<std::endl;
-			this->u0[i-k] = this->work_u[i];
-		}
-
-	}
-
-	cout << "Grid contracted" <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const
-{
-	VectorType u;
-	u.setSize(this->n*this->n);
-
-	for( int j = 0; j < u.getSize(); j++)
-	{
-		u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols
-		                     + (i % this->gridCols) * this->n
-		                     + (j/this->n) * this->n*this->gridCols
-		                     + (j % this->n) ];
-	}
-	return u;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i )
-{
-
-	for( int j = 0; j < this->n*this->n; j++)
-	{
-		int index = (i / this->gridCols)*this->n*this->n*this->gridCols
-					+ (i % this->gridCols)*this->n
-					+ (j/this->n)*this->n*this->gridCols
-					+ (j % this->n);
-		//OMP LOCK index
-		if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) )
-		{
-			this->work_u[index] = u[j];
-			this->unusedCell[index] = 0;
-		}
-		//OMP UNLOCK index
-	}
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID)
-{
-
-	VectorType fu;
-
-	fu.setLike(u);
-	fu.setValue( 0.0 );
-
-/*
- *          Insert Euler-Solver Here
- */
-
-	/**/
-
-	/*for(int i = 0; i < u.getSize(); i++)
-	{
-		int x = this->subMesh.getCellCoordinates(i).x();
-		int y = this->subMesh.getCellCoordinates(i).y();
-
-		if(x == 0 && (boundaryCondition & 4) && y ==0)
-		{
-			if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
-			{
-				//cout << "x = 0; y = 0" <<std::endl;
-				u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
-			}
-		}
-		else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1)
-		{
-			if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
-			{
-				//cout << "x = 0; y = n" <<std::endl;
-				u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
-			}
-		}
-
-
-		else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0)
-		{
-			if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
-			{
-				//cout << "x = n; y = 0" <<std::endl;
-				u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
-			}
-		}
-		else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1)
-		{
-			if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
-			{
-				//cout << "x = n; y = n" <<std::endl;
-				u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
-			}
-		}
-
-
-		else if(y == 0 && (boundaryCondition & 8) && x ==0)
-		{
-			if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
-			{
-				//cout << "y = 0; x = 0" <<std::endl;
-				u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
-			}
-		}
-		else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1)
-		{
-			if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
-			{
-				//cout << "y = 0; x = n" <<std::endl;
-				u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
-			}
-		}
-
-
-		else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0)
-		{
-			if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)			{
-				//cout << "y = n; x = 0" <<std::endl;
-				u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
-			}
-		}
-		else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1)
-		{
-			if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
-			{
-				//cout << "y = n; x = n" <<std::endl;
-				u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
-			}
-		}
-	}*/
-
-	/**/
-
-
-/*	bool tmp = false;
-	for(int i = 0; i < u.getSize(); i++)
-	{
-		if(u[0]*u[i] <= 0.0)
-			tmp=true;
-	}
-
-
-	if(tmp)
-	{}
-	else if(boundaryCondition == 4)
-	{
-		int i;
-		for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i))
-		{
-			int j;
-			for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j))
-			{
-				u[j] = u[i];
-			}
-			u[j] = u[i];
-		}
-		int j;
-		for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j))
-		{
-			u[j] = u[i];
-		}
-		u[j] = u[i];
-	}
-	else if(boundaryCondition == 8)
-	{
-		int i;
-		for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i))
-		{
-			int j;
-			for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j))
-			{
-				u[j] = u[i];
-			}
-			u[j] = u[i];
-		}
-		int j;
-		for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j))
-		{
-			u[j] = u[i];
-		}
-		u[j] = u[i];
-
-	}
-	else if(boundaryCondition == 2)
-	{
-		int i;
-		for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i))
-		{
-			int j;
-			for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j))
-			{
-				u[j] = u[i];
-			}
-			u[j] = u[i];
-		}
-		int j;
-		for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j))
-		{
-			u[j] = u[i];
-		}
-		u[j] = u[i];
-	}
-	else if(boundaryCondition == 1)
-	{
-		int i;
-		for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i))
-		{
-			int j;
-			for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j))
-			{
-				u[j] = u[i];
-			}
-			u[j] = u[i];
-		}
-		int j;
-		for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j))
-		{
-			u[j] = u[i];
-		}
-		u[j] = u[i];
-	}
-*/
-	/**/
-
-
-
-	bool tmp = false;
-	for(int i = 0; i < u.getSize(); i++)
-	{
-		if(u[0]*u[i] <= 0.0)
-			tmp=true;
-		int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1);
-		if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0)
-			tmp = true;
-	}
-	//if(this->currentStep + 3 < getSubgridValue(subGridID))
-		//tmp = true;
-
-
-	double value = sign(u[0]) * u.absMax();
-
-	if(tmp)
-	{}
-
-
-	//north - 1, east - 2, west - 4, south - 8
-	else if(boundaryCondition == 4)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j = 1;j < this->n; j++)
-				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
-				u[i*this->n + j] = value;// u[i*this->n];
-	}
-	else if(boundaryCondition == 2)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j =0 ;j < this->n -1; j++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
-				u[i*this->n + j] = value;// u[(i+1)*this->n - 1];
-	}
-	else if(boundaryCondition == 1)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 0;i < this->n - 1; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
-				u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
-	}
-	else if(boundaryCondition == 8)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 1;i < this->n; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
-				u[i*this->n + j] = value;// u[j];
-	}
-
-/*
-
-	else if(boundaryCondition == 5)
-	{
-		for(int i = 0; i < this->n - 1; i++)
-			for(int j = 1;j < this->n; j++)
-				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
-				u[i*this->n + j] = value;// u[i*this->n];
-	}
-	else if(boundaryCondition == 10)
-	{
-		for(int i = 1; i < this->n; i++)
-			for(int j =0 ;j < this->n -1; j++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
-				u[i*this->n + j] = value;// u[(i+1)*this->n - 1];
-	}
-	else if(boundaryCondition == 3)
-	{
-		for(int j = 0; j < this->n - 1; j++)
-			for(int i = 0;i < this->n - 1; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
-				u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
-	}
-	else if(boundaryCondition == 12)
-	{
-		for(int j = 1; j < this->n; j++)
-			for(int i = 1;i < this->n; i++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
-				u[i*this->n + j] = value;// u[j];
-	}
-*/
-
-
-	/**/
-
-	/*if (u.max() > 0.0)
-		this->stopTime *=(double) this->gridCols;*/
-
-
-   double time = 0.0;
-   double currentTau = this->tau0;
-   double finalTime = this->stopTime;// + 3.0*(u.max() - u.min());
-   if( time + currentTau > finalTime ) currentTau = finalTime - time;
-
-   double maxResidue( 1.0 );
-   //double lastResidue( 10000.0 );
-   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-   while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/)
-   {
-      /****
-       * Compute the RHS
-       */
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-			Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()));
-			Entity.refresh();
-			neighborEntities.refresh(subMesh,Entity.getIndex());
-    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities);
-      }
-      maxResidue = fu. absMax();
-
-
-      if( this -> cflCondition * maxResidue != 0.0)
-    	  currentTau =  this -> cflCondition / maxResidue;
-
-     /* if (maxResidue < 0.05)
-    	 std::cout << "Max < 0.05" <<std::endl;*/
-      if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())
-      {
-    	  //cout << currentTau << " >= " << 2.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >() <<std::endl;
-    	  currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
-      }
-      /*if(maxResidue > lastResidue)
-    	  currentTau *=(1.0/10.0);*/
-
-
-      if( time + currentTau > finalTime ) currentTau = finalTime - time;
-//      for( int i = 0; i < fu.getSize(); i ++ )
-//      {
-//    	  //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <<std::endl;
-//    	  if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 )
-//    		  currentTau = fabs(u[i]/(2.0*fu[i]));
-//
-//      }
-
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-    	  double add = u[i] + currentTau * fu[ i ];
-    	  //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) )
-    		  u[ i ] = add;
-      }
-      time += currentTau;
-
-      //cout << '\r' << flush;
-     //cout << maxResidue << "   " << currentTau << " @ " << time << flush;
-     //lastResidue = maxResidue;
-   }
-   //cout << "Time: " << time << ", Res: " << maxResidue <<endl;
-	/*if (u.max() > 0.0)
-		this->stopTime /=(double) this->gridCols;*/
-
-	VectorType solution;
-	solution.setLike(u);
-    for( int i = 0; i < u.getSize(); i ++ )
-  	{
-    	solution[i]=u[i];
-   	}
-	return solution;
-}
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-	//int j = threadIdx.x + threadIdx.y * blockDim.x;
-	int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
-	//printf("i= %d,j= %d,th= %d\n",i,j,th);
-	*a = caller->work_u_cuda[th];
-	//printf("Hi %f \n", *a);
-	//return ret;
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-//	int j = threadIdx.x + threadIdx.y * blockDim.x;
-	int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
-
-	if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) )
-	{
-		caller->work_u_cuda[index] = *a;
-		caller->unusedCell_cuda[index] = 0;
-
-	}
-
-	*a = caller->work_u_cuda[index];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i )
-{
-
-
-//	int j = threadIdx.x + threadIdx.y * blockDim.x;
-	//printf("j = %d, u = %f\n", j,u);
-
-		int index = (blockIdx.y)*this->n*this->n*this->gridCols
-					+ (blockIdx.x)*this->n
-					+ threadIdx.y*this->n*this->gridCols
-					+ threadIdx.x;
-
-		//printf("i= %d,j= %d,index= %d\n",i,j,index);
-		if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )
-		{
-			this->work_u_cuda[index] = u;
-			this->unusedCell_cuda[index] = 0;
-
-		}
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID)
-{
-
-	__shared__ int tmp;
-	__shared__ double value;
-	//double tmpRes = 0.0;
-	volatile double* sharedTau = &u[blockDim.x*blockDim.y];
-	volatile double* absVal = &u[2*blockDim.x*blockDim.y];
-	int i = threadIdx.x;
-	int j = threadIdx.y;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-	bool computeFU = !((i == 0 && (boundaryCondition & 4)) or
-			 (i == blockDim.x - 1 && (boundaryCondition & 2)) or
-			 (j == 0 && (boundaryCondition & 8)) or
-			 (j == blockDim.y - 1  && (boundaryCondition & 1)));
-
-	if(l == 0)
-	{
-		tmp = 0;
-		int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1);
-		if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0)
-			tmp = 1;
-	}
-	__syncthreads();
-
-	/*if(!tmp && (u[0]*u[l] <= 0.0))
-		atomicMax( &tmp, 1);*/
-
-	__syncthreads();
-	if(tmp !=1)
-	{
-//		if(computeFU)
-//			absVal[l]=0.0;
-//		else
-//			absVal[l] = fabs(u[l]);
-//
-//		__syncthreads();
-//
-//	      if((blockDim.x == 16) && (l < 128))		absVal[l] = Max(absVal[l],absVal[l+128]);
-//	      __syncthreads();
-//	      if((blockDim.x == 16) && (l < 64))		absVal[l] = Max(absVal[l],absVal[l+64]);
-//	      __syncthreads();
-//	      if(l < 32)    							absVal[l] = Max(absVal[l],absVal[l+32]);
-//	      if(l < 16)								absVal[l] = Max(absVal[l],absVal[l+16]);
-//	      if(l < 8)									absVal[l] = Max(absVal[l],absVal[l+8]);
-//	      if(l < 4)									absVal[l] = Max(absVal[l],absVal[l+4]);
-//	      if(l < 2)									absVal[l] = Max(absVal[l],absVal[l+2]);
-//	      if(l < 1)									value   = sign(u[0])*Max(absVal[l],absVal[l+1]);
-//		__syncthreads();
-//
-//		if(computeFU)
-//			u[l] = value;
-		if(computeFU)
-		{
-			if(boundaryCondition == 4)
-				u[l] = u[threadIdx.y * blockDim.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+  2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n);
-			else if(boundaryCondition == 2)
-				u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n);
-			else if(boundaryCondition == 8)
-				u[l] = u[threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n);
-			else if(boundaryCondition == 1)
-				u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y  - 1 +this->n);
-		}
-	}
-
-   double time = 0.0;
-   __shared__ double currentTau;
-   double cfl = this->cflCondition;
-   double fu = 0.0;
-//   if(threadIdx.x * threadIdx.y == 0)
-//   {
-//	   currentTau = finalTime;
-//   }
-   double finalTime = this->stopTime;
-   __syncthreads();
-//   if( time + currentTau > finalTime ) currentTau = finalTime - time;
-
-   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-   Entity.setCoordinates(Containers::StaticVector<2,int>(i,j));
-   Entity.refresh();
-   neighborEntities.refresh(subMesh,Entity.getIndex());
-
-
-   while( time < finalTime )
-   {
-	  if(computeFU)
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighborEntities);
-
-	  sharedTau[l]=abs(cfl/fu);
-
-      if(l == 0)
-      {
-    	  if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())	sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
-      }
-      else if(l == blockDim.x*blockDim.y - 1)
-    	  if( time + sharedTau[l] > finalTime )		sharedTau[l] = finalTime - time;
-
-
-//      if(  (sign(u[l]+sharedTau[l]*fu) != sign(u[l])) && fu != 0.0 && fu != -0.0)
-//    	  {
-//    	  printf("orig: %10f", sharedTau[l]);
-//    	  sharedTau[l]=abs(u[l]/(1.1*fu)) ;
-//    	  printf("   new: %10f\n", sharedTau[l]);
-//    	  }
-
-
-
-      if((blockDim.x == 16) && (l < 128))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]);
-      __syncthreads();
-      if((blockDim.x == 16) && (l < 64))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]);
-      __syncthreads();
-      if(l < 32)    							sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]);
-      if(l < 16)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]);
-      if(l < 8)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]);
-      if(l < 4)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]);
-      if(l < 2)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]);
-      if(l < 1)									currentTau   = Min(sharedTau[l],sharedTau[l+1]);
-	__syncthreads();
-
-      u[l] += currentTau * fu;
-      time += currentTau;
-   }
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const
-{
-
-	return ((i / (this->gridCols*this->n*this->n))*this->gridCols
-			+ (i % (this->gridCols*this->n))/this->n);
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const
-{
-	return this->subgridValues_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value)
-{
-	this->subgridValues_cuda[i] = value;
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const
-{
-	return this->boundaryConditions_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value)
-{
-	this->boundaryConditions_cuda[i] = value;
-}
-
-
-
-//north - 1, east - 2, west - 4, south - 8
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-
-	__shared__ int boundary[4]; // north,east,west,south
-	__shared__ int subgridValue;
-	__shared__ int newSubgridValue;
-
-
-	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
-	double u = cudaSolver->work_u_cuda[gid];
-	double u_cmp;
-	int subgridValue_cmp=INT_MAX;
-	int boundary_index=0;
-
-
-	if(threadIdx.x+threadIdx.y == 0)
-	{
-		subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x);
-		boundary[0] = 0;
-		boundary[1] = 0;
-		boundary[2] = 0;
-		boundary[3] = 0;
-		newSubgridValue = 0;
-		//printf("%d   %d\n", blockDim.x, gridDim.x);
-	}
-	__syncthreads();
-
-
-
-	if(		(threadIdx.x == 0 				/*				&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == 0 				 	/*			&& (cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.x == blockDim.x - 1 	 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == blockDim.y - 1 	 /*	&& (cudaSolver->currentStep & 1)*/) 		)
-	{
-		if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1);
-			boundary_index = 2;
-		}
-
-		if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1);
-			boundary_index = 1;
-		}
-
-		__threadfence();
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-			u=u_cmp;
-		}
-		__threadfence();
-		if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x);
-			boundary_index = 3;
-		}
-		if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x);
-			boundary_index = 0;
-		}
-
-//		__threadfence();
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-		}
-	}
-	__threadfence();
-	__syncthreads();
-
-	if(threadIdx.x+threadIdx.y == 0)
-	{
-		if(subgridValue == INT_MAX && newSubgridValue !=0)
-			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX);
-
-		cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, 	boundary[0] +
-																				2 * boundary[1] +
-																				4 * boundary[2] +
-																				8 * boundary[3]);
-
-
-		if(blockIdx.x+blockIdx.y ==0)
-		{
-			cudaSolver->currentStep = cudaSolver->currentStep + 1;
-			*(cudaSolver->runcuda) = 0;
-		}
-//
-//		int stepValue = cudaSolver->currentStep + 4;
-//		if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
-//				cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue);
-//
-//		atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x));
-	}
-
-
-	/*
-	//printf("I am not an empty kernel!\n");
-	//cout << "Synchronizig..." <<std::endl;
-	int tmp1, tmp2;
-	int grid1, grid2;
-
-	if(cudaSolver->currentStep & 1)
-	{
-		//printf("I am not an empty kernel! 1\n");
-		for(int j = 0; j < cudaSolver->gridRows - 1; j++)
-		{
-			//printf("I am not an empty kernel! 3\n");
-			for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++)
-			{
-				tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i;
-				tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i;
-				grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1));
-				grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2));
-
-				if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
-					cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1];
-					cudaSolver->unusedCell_cuda[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX);
-					}
-					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) )
-						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8);
-				}
-				else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
-					cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2];
-					cudaSolver->unusedCell_cuda[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX);
-					}
-					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) )
-						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1);
-				}
-			}
-		}
-
-	}
-	else
-	{
-		//printf("I am not an empty kernel! 2\n");
-		for(int i = 1; i < cudaSolver->gridCols; i++)
-		{
-			//printf("I am not an empty kernel! 4\n");
-			for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++)
-			{
-
-				tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1;
-				tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ;
-				grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1));
-				grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2));
-
-				if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-				{
-					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
-					cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1];
-					cudaSolver->unusedCell_cuda[tmp2] = 0;
-					if(grid2 == INT_MAX)
-					{
-						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX);
-					}
-					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) )
-						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4);
-				}
-				else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-				{
-					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
-					cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2];
-					cudaSolver->unusedCell_cuda[tmp1] = 0;
-					if(grid1 == INT_MAX)
-					{
-						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX);
-					}
-					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) )
-						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2);
-				}
-			}
-		}
-	}
-	//printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep);
-
-	cudaSolver->currentStep = cudaSolver->currentStep + 1;
-	int stepValue = cudaSolver->currentStep + 4;
-	for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++)
-	{
-		if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX )
-			cudaSolver->setSubgridValueCUDA2D(i, stepValue);
-	}
-
-	int maxi = 0;
-	for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++)
-	{
-		//printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]);
-		maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q));
-	}
-	//printf("I am not an empty kernel! %d\n", maxi);
-	*(cudaSolver->runcuda) = (maxi > 0);
-	//printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]);
-	//cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl;
-*/
-}
-
-
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void synchronize2CUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver)
-{
-//	if(blockIdx.x+blockIdx.y ==0)
-//	{
-//		cudaSolver->currentStep = cudaSolver->currentStep + 1;
-//		*(cudaSolver->runcuda) = 0;
-//	}
-
-	int stepValue = cudaSolver->currentStep + 4;
-	if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
-			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue);
-
-	atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x));
-}
-
-
-
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3)
-{
-	//cout << "Initializating solver..." <<std::endl;
-	//const String& meshLocation = parameters.getParameter <String>("mesh");
-	//this->mesh_cuda.load( meshLocation );
-
-	//this->n_cuda = parameters.getParameter <int>("subgrid-size");
-	//cout << "Setting N << this->n_cuda <<std::endl;
-
-	//this->subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda );
-	//this->subMesh_cuda.setDomain( Containers::StaticVector<2,double>(0.0, 0.0),
-							 //Containers::StaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) );
-
-	//this->subMesh_cuda.save("submesh.tnl");
-
-//	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-//	this->u0.load( initialCondition );
-
-	//cout << this->mesh.getCellCenter(0) <<std::endl;
-
-	//this->delta_cuda = parameters.getParameter <double>("delta");
-	//this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >();
-
-	//cout << "Setting delta to " << this->delta <<std::endl;
-
-	//this->tau0_cuda = parameters.getParameter <double>("initial-tau");
-	//cout << "Setting initial tau to " << this->tau0_cuda <<std::endl;
-	//this->stopTime_cuda = parameters.getParameter <double>("stop-time");
-
-	//this->cflCondition_cuda = parameters.getParameter <double>("cfl-condition");
-	//this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >());
-	//cout << "Setting CFL to " << this->cflCondition <<std::endl;
-////
-////
-
-//	this->gridRows_cuda = gridRows;
-//	this->gridCols_cuda = gridCols;
-
-	cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double));
-	cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int));
-	cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
-	cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
-	cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool));
-	*(cudaSolver->runcuda) = 1;
-	cudaSolver->currentStep = 1;
-	//cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost);
-	//ptr = cudaSolver->work_u_cuda;
-	printf("GPU memory allocated.\n");
-
-	for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++)
-	{
-		cudaSolver->subgridValues_cuda[i] = INT_MAX;
-		cudaSolver->boundaryConditions_cuda[i] = 0;
-	}
-
-	/*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++)
-	{
-		printf("%d\n",j);
-		cudaSolver->unusedCell_cuda[ j] = 1;
-	}*/
-	printf("GPU memory initialized.\n");
-
-
-	//cudaSolver->work_u_cuda[50] = 32.153438;
-////
-////
-	//stretchGrid();
-	//this->stopTime_cuda /= (double)(this->gridCols_cuda);
-	//this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0));
-	//cout << "Setting stopping time to " << this->stopTime <<std::endl;
-	//this->stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter <double>("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >();
-	//cout << "Setting stopping time to " << this->stopTime <<std::endl;
-
-	//cout << "Initializating scheme..." <<std::endl;
-	//if(!this->schemeDevice.init(parameters))
-//	{
-		//cerr << "Scheme failed to initialize." <<std::endl;
-//		return false;
-//	}
-	//cout << "Scheme initialized." <<std::endl;
-
-	//test();
-
-//	this->currentStep_cuda = 1;
-	//return true;
-}
-
-
-
-
-//extern __shared__ double array[];
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
-
-{
-
-
-	extern __shared__ double u[];
-	//printf("%p\n",caller->work_u_cuda);
-
-	int i = blockIdx.y * gridDim.x + blockIdx.x;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-
-	__shared__ int containsCurve;
-	if(l == 0)
-		containsCurve = 0;
-
-	//double a;
-	caller->getSubgridCUDA2D(i,caller, &u[l]);
-	//printf("%f   %f\n",a , u[l]);
-	//u[l] = a;
-	//printf("Hi %f \n", u[l]);
-	__syncthreads();
-	//printf("hurewrwr %f \n", u[l]);
-	if(u[0] * u[l] <= 0.0)
-	{
-		//printf("contains %d \n",i);
-		atomicMax( &containsCurve, 1);
-	}
-
-	__syncthreads();
-	//printf("hu");
-	//printf("%d : %f\n", l, u[l]);
-	if(containsCurve == 1)
-	{
-		//printf("have curve \n");
-		caller->runSubgridCUDA2D(0,u,i);
-		//printf("%d : %f\n", l, u[l]);
-		__syncthreads();
-		caller->insertSubgridCUDA2D(u[l],i);
-		__syncthreads();
-		if(l == 0)
-			caller->setSubgridValueCUDA2D(i, 4);
-	}
-
-
-}
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
-{
-	extern __shared__ double u[];
-	int i = blockIdx.y * gridDim.x + blockIdx.x;
-	int l = threadIdx.y * blockDim.x + threadIdx.x;
-	int bound = caller->getBoundaryConditionCUDA2D(i);
-
-	if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0)
-	{
-		caller->getSubgridCUDA2D(i,caller, &u[l]);
-
-		//if(l == 0)
-			//printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i));
-		if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4)
-		{
-			if(bound & 1)
-			{
-				caller->runSubgridCUDA2D(1,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 2 )
-			{
-				caller->runSubgridCUDA2D(2,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 4)
-			{
-				caller->runSubgridCUDA2D(4,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 8)
-			{
-				caller->runSubgridCUDA2D(8,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-
-
-
-
-
-			if( ((bound & 3 )))
-				{
-					caller->runSubgridCUDA2D(3,u,i);
-					//__syncthreads();
-					//caller->insertSubgridCUDA2D(u[l],i);
-					//__syncthreads();
-					//caller->getSubgridCUDA2D(i,caller, &u[l]);
-					caller->updateSubgridCUDA2D(i,caller, &u[l]);
-					__syncthreads();
-				}
-				if( ((bound & 5 )))
-				{
-					caller->runSubgridCUDA2D(5,u,i);
-					//__syncthreads();
-					//caller->insertSubgridCUDA2D(u[l],i);
-					//__syncthreads();
-					//caller->getSubgridCUDA2D(i,caller, &u[l]);
-					caller->updateSubgridCUDA2D(i,caller, &u[l]);
-					__syncthreads();
-				}
-				if( ((bound & 10 )))
-				{
-					caller->runSubgridCUDA2D(10,u,i);
-					//__syncthreads();
-					//caller->insertSubgridCUDA2D(u[l],i);
-					//__syncthreads();
-					//caller->getSubgridCUDA2D(i,caller, &u[l]);
-					caller->updateSubgridCUDA2D(i,caller, &u[l]);
-					__syncthreads();
-				}
-				if(   (bound & 12 ))
-				{
-					caller->runSubgridCUDA2D(12,u,i);
-					//__syncthreads();
-					//caller->insertSubgridCUDA2D(u[l],i);
-					//__syncthreads();
-					//caller->getSubgridCUDA2D(i,caller, &u[l]);
-					caller->updateSubgridCUDA2D(i,caller, &u[l]);
-					__syncthreads();
-				}
-
-
-
-
-
-		}
-
-
-		else
-		{
-
-
-
-
-
-
-
-
-
-			if( ((bound == 2)))
-						{
-							caller->runSubgridCUDA2D(2,u,i);
-							//__syncthreads();
-							//caller->insertSubgridCUDA2D(u[l],i);
-							//__syncthreads();
-							//caller->getSubgridCUDA2D(i,caller, &u[l]);
-							caller->updateSubgridCUDA2D(i,caller, &u[l]);
-							__syncthreads();
-						}
-						if( ((bound == 1) ))
-						{
-							caller->runSubgridCUDA2D(1,u,i);
-							//__syncthreads();
-							//caller->insertSubgridCUDA2D(u[l],i);
-							//__syncthreads();
-							//caller->getSubgridCUDA2D(i,caller, &u[l]);
-							caller->updateSubgridCUDA2D(i,caller, &u[l]);
-							__syncthreads();
-						}
-						if( ((bound == 8) ))
-						{
-							caller->runSubgridCUDA2D(8,u,i);
-							//__syncthreads();
-							//caller->insertSubgridCUDA2D(u[l],i);
-							//__syncthreads();
-							//caller->getSubgridCUDA2D(i,caller, &u[l]);
-							caller->updateSubgridCUDA2D(i,caller, &u[l]);
-							__syncthreads();
-						}
-						if(   (bound == 4))
-						{
-							caller->runSubgridCUDA2D(4,u,i);
-							//__syncthreads();
-							//caller->insertSubgridCUDA2D(u[l],i);
-							//__syncthreads();
-							//caller->getSubgridCUDA2D(i,caller, &u[l]);
-							caller->updateSubgridCUDA2D(i,caller, &u[l]);
-							__syncthreads();
-						}
-
-
-
-
-
-
-
-
-
-
-			if( ((bound & 3) ))
-			{
-				caller->runSubgridCUDA2D(3,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if( ((bound & 5) ))
-			{
-				caller->runSubgridCUDA2D(5,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if( ((bound & 10) ))
-			{
-				caller->runSubgridCUDA2D(10,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(   (bound & 12) )
-			{
-				caller->runSubgridCUDA2D(12,u,i);
-				//__syncthreads();
-				//caller->insertSubgridCUDA2D(u[l],i);
-				//__syncthreads();
-				//caller->getSubgridCUDA2D(i,caller, &u[l]);
-				caller->updateSubgridCUDA2D(i,caller, &u[l]);
-				__syncthreads();
-			}
-
-
-
-
-
-
-
-
-
-
-
-
-		}
-		/*if( bound )
-		{
-			caller->runSubgridCUDA2D(15,u,i);
-			__syncthreads();
-			//caller->insertSubgridCUDA2D(u[l],i);
-			//__syncthreads();
-			//caller->getSubgridCUDA2D(i,caller, &u[l]);
-			caller->updateSubgridCUDA2D(i,caller, &u[l]);
-			__syncthreads();
-		}*/
-
-		if(l==0)
-		{
-			caller->setBoundaryConditionCUDA2D(i, 0);
-			caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 );
-		}
-
-
-	}
-
-
-
-}
-
-#endif /*HAVE_CUDA*/
-
-#endif /* TNLPARALLELEIKONALSOLVER2D_IMPL_H_ */
diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
deleted file mode 100644
index dc3fd54679bc5c5b6be44841e56e1c32167b5226..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
+++ /dev/null
@@ -1,1706 +0,0 @@
-/***************************************************************************
-                          tnlParallelEikonalSolver2D_impl.h  -  description
-                             -------------------
-    begin                : Nov 28 , 2014
-    copyright            : (C) 2014 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef TNLPARALLELEIKONALSOLVER3D_IMPL_H_
-#define TNLPARALLELEIKONALSOLVER3D_IMPL_H_
-
-
-#include "tnlParallelEikonalSolver.h"
-#include <core/mfilename.h>
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver()
-{
-	cout << "a" <<std::endl;
-	this->device = TNL::Devices::HostDevice;  /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice   ---    vypocet na CPU
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-	run_host = 1;
-	}
-#endif
-
-	cout << "b" <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::test()
-{
-/*
-	for(int i =0; i < this->subgridValues.getSize(); i++ )
-	{
-		insertSubgrid(getSubgrid(i), i);
-	}
-*/
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-
-bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters )
-{
-	cout << "Initializating solver..." <<std::endl;
-	const String& meshLocation = parameters.getParameter <String>("mesh");
-	this->mesh.load( meshLocation );
-
-	this->n = parameters.getParameter <int>("subgrid-size");
-	cout << "Setting N to " << this->n <<std::endl;
-
-	this->subMesh.setDimensions( this->n, this->n, this->n );
-	this->subMesh.setDomain( Containers::StaticVector<3,double>(0.0, 0.0, 0.0),
-							 Containers::StaticVector<3,double>(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1, 0 >()*(double)(this->n),mesh.template getSpaceStepsProducts< 0, 0, 1 >()*(double)(this->n)) );
-
-	this->subMesh.save("submesh.tnl");
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	this->u0.load( initialCondition );
-
-	//cout << this->mesh.getCellCenter(0) <<std::endl;
-
-	this->delta = parameters.getParameter <double>("delta");
-	this->delta *= mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >();
-
-	cout << "Setting delta to " << this->delta <<std::endl;
-
-	this->tau0 = parameters.getParameter <double>("initial-tau");
-	cout << "Setting initial tau to " << this->tau0 <<std::endl;
-	this->stopTime = parameters.getParameter <double>("stop-time");
-
-	this->cflCondition = parameters.getParameter <double>("cfl-condition");
-	this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >());
-	cout << "Setting CFL to " << this->cflCondition <<std::endl;
-
-	stretchGrid();
-	this->stopTime /= (double)(this->gridCols);
-	this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0));
-	cout << "Setting stopping time to " << this->stopTime <<std::endl;
-	//this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*mesh.template getSpaceStepsProducts< 1, 0, 0 >();
-	//cout << "Setting stopping time to " << this->stopTime <<std::endl;
-
-	cout << "Initializating scheme..." <<std::endl;
-	if(!this->schemeHost.init(parameters))
-	{
-		cerr << "SchemeHost failed to initialize." <<std::endl;
-		return false;
-	}
-	cout << "Scheme initialized." <<std::endl;
-
-	test();
-
-	VectorType* tmp = new VectorType[subgridValues.getSize()];
-
-
-#ifdef HAVE_CUDA
-
-	if(this->device == tnlCudaDevice)
-	{
-	/*cout << "Testing... " <<std::endl;
-	if(this->device == tnlCudaDevice)
-	{
-	if( !initCUDA3D(parameters, gridRows, gridCols) )
-		return false;
-	}*/
-		//cout << "s" <<std::endl;
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >));
-	//cout << "s" <<std::endl;
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice);
-	//cout << "s" <<std::endl;
-	double** tmpdev = NULL;
-	cudaMalloc(&tmpdev, sizeof(double*));
-	//double* tmpw;
-	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
-	cudaMalloc(&(this->runcuda), sizeof(int));
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	int* tmpUC;
-	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
-	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
-
-	initCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	//cout << "s " <<std::endl;
-	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
-	double* tmpu = NULL;
-
-	cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
-	//printf("%p %p \n",tmpu,tmpw);
-	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	//cout << "s "<<std::endl;
-
-	}
-#endif
-
-	if(this->device == TNL::Devices::HostDevice)
-	{
-#ifdef HAVE_OPENMP
-#pragma omp parallel for num_threads(4) schedule(dynamic)
-#endif
-		for(int i = 0; i < this->subgridValues.getSize(); i++)
-		{
-			bool containsCurve = false;
-//			cout << "Working on subgrid " << i <<" --- check 1" <<std::endl;
-
-			if(! tmp[i].setSize(this->n*this->n*this->n))
-				cout << "Could not allocate tmp["<< i <<"] array." <<std::endl;
-//			cout << "Working on subgrid " << i <<" --- check 2" <<std::endl;
-
-			tmp[i] = getSubgrid(i);
-			containsCurve = false;
-//			cout << "Working on subgrid " << i <<" --- check 3" <<std::endl;
-
-
-			for(int j = 0; j < tmp[i].getSize(); j++)
-			{
-				if(tmp[i][0]*tmp[i][j] <= 0.0)
-				{
-					containsCurve = true;
-					j=tmp[i].getSize();
-//					cout << tmp[i][0] << " " << tmp[i][j] <<std::endl;
-				}
-
-			}
-//			cout << "Working on subgrid " << i <<" --- check 4" <<std::endl;
-
-			if(containsCurve)
-			{
-//				cout << "Computing initial SDF on subgrid " << i << "." <<std::endl;
-				tmp[i] = runSubgrid(0, tmp[i] ,i);
-				insertSubgrid( tmp[i], i);
-				setSubgridValue(i, 4);
-//				cout << "Computed initial SDF on subgrid " << i  << "." <<std::endl;
-			}
-			containsCurve = false;
-
-		}
-//		cout << "CPU: Curve found" <<std::endl;
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-//		cout << "pre 1 kernel" <<std::endl;
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		dim3 threadsPerBlock(this->n, this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-//		cout << "post 1 kernel" <<std::endl;
-
-	}
-#endif
-
-
-	this->currentStep = 1;
-	if(this->device == TNL::Devices::HostDevice)
-		synchronize();
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		dim3 threadsPerBlock(this->n, this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels);
-		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
-		//cout << test[0] <<"   " << test[1] <<"   " << test[2] <<"   " << test[3] <<std::endl;
-		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->tmpw << "   " <<  test[0] <<"   " << test[1] << "   " <<test[2] << "   " <<test[3] <<std::endl;
-
-		TNL_CHECK_CUDA_DEVICE;
-
-		synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-		cout << cudaGetErrorString(cudaDeviceSynchronize()) <<std::endl;
-		TNL_CHECK_CUDA_DEVICE;
-		synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cout << test[0] << "   " <<test[1] <<"   " << test[2] << "   " <<test[3] <<std::endl;
-		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//TNL_CHECK_CUDA_DEVICE;
-		//cout << this->tmpw << "   " <<  test[0] << "   " <<test[1] << "   " <<test[2] <<"   " << test[3] <<std::endl;
-		//free(test);
-
-	}
-
-#endif
-	cout << "Solver initialized." <<std::endl;
-
-	return true;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::run()
-{
-	if(this->device == TNL::Devices::HostDevice)
-	{
-
-	bool end = false;
-	while (/*(this->boundaryConditions.max() > 0 ) ||*/ !end)
-	{
-		if(this->boundaryConditions.max() == 0 || this->subgridValues.max() < 0)
-			end=true;
-		else
-			end=false;
-#ifdef HAVE_OPENMP
-#pragma omp parallel for num_threads(4) schedule(dynamic)
-#endif
-		for(int i = 0; i < this->subgridValues.getSize(); i++)
-		{
-			VectorType tmp;
-			tmp.setSize(this->n*this->n*this->n);
-			if(getSubgridValue(i) != INT_MAX)
-			{
-				//cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <<std::endl;
-
-				if(getSubgridValue(i) == currentStep+4)
-				{
-
-					if(getBoundaryCondition(i) & 1)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(1, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-					if(getBoundaryCondition(i) & 2)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(2, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-					if(getBoundaryCondition(i) & 4)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(4, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-					if(getBoundaryCondition(i) & 8)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(8, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-					if(getBoundaryCondition(i) & 16)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(16, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-					if(getBoundaryCondition(i) & 32)
-					{
-						tmp = getSubgrid(i);
-						tmp = runSubgrid(32, tmp ,i);
-						insertSubgrid( tmp, i);
-						this->calculationsCount[i]++;
-					}
-				}
-
-				if( getBoundaryCondition(i) & 19)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(19, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 21)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(21, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 26)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(26, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 28)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(28, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-
-				if( getBoundaryCondition(i) & 35)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(35, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 37)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(37, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 42)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(42, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-				if( getBoundaryCondition(i) & 44)
-				{
-					tmp = getSubgrid(i);
-					tmp = runSubgrid(44, tmp ,i);
-					insertSubgrid( tmp, i);
-				}
-
-
-				setBoundaryCondition(i, 0);
-				setSubgridValue(i, getSubgridValue(i)-1);
-
-			}
-		}
-		synchronize();
-	}
-	}
-#ifdef HAVE_CUDA
-	else if(this->device == tnlCudaDevice)
-	{
-		//cout << "fn" <<std::endl;
-		bool end_cuda = false;
-		dim3 threadsPerBlock(this->n, this->n, this->n);
-		dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cudaMalloc(&runcuda,sizeof(bool));
-		//cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice);
-		//cout << "fn" <<std::endl;
-		bool* tmpb;
-		//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-		//cudaDeviceSynchronize();
-		//TNL_CHECK_CUDA_DEVICE;
-		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		//cout << "fn" <<std::endl;
-		int i = 1;
-		time_diff = 0.0;
-		while (run_host || !end_cuda)
-		{
-			cout << "Computing at step "<< i++ <<std::endl;
-			if(run_host != 0 )
-				end_cuda = true;
-			else
-				end_cuda = false;
-			//cout << "a" <<std::endl;
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			start = std::clock();
-			runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
-			//cout << "a" <<std::endl;
-			cudaDeviceSynchronize();
-			time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-			//start = std::clock();
-			synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
-
-
-			//cout << "a" <<std::endl;
-			//run_host = false;
-			//cout << "in kernel loop" << run_host <<std::endl;
-			//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
-			cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost);
-			//cout << "in kernel loop" << run_host <<std::endl;
-		}
-		cout << "Solving time was: " << time_diff <<std::endl;
-		//cout << "b" <<std::endl;
-
-		//double* tmpu;
-		//cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice);
-		//cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->work_u.getData()[0] <<std::endl;
-
-		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
-		//cout << test[0] << test[1] << test[2] << test[3] <<std::endl;
-		cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-		//cout << this->tmpw << "   " <<  test[0] << test[1] << test[2] << test[3] <<std::endl;
-		//free(test);
-
-		cudaDeviceSynchronize();
-	}
-#endif
-	contractGrid();
-	this->u0.save("u-00001.tnl");
-	cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl;
-	cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl;
-	cout << "Solver finished" <<std::endl;
-
-#ifdef HAVE_CUDA
-	if(this->device == tnlCudaDevice)
-	{
-		cudaFree(this->runcuda);
-		cudaFree(this->tmpw);
-		cudaFree(this->cudaSolver);
-	}
-#endif
-
-}
-
-//north - 1, east - 2, west - 4, south - 8
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-	cout << "Synchronizig..." <<std::endl;
-	int tmp1, tmp2;
-	int grid1, grid2;
-
-//	if(this->currentStep & 1)
-//	{
-		for(int j = 0; j < this->gridRows - 1; j++)
-		{
-			for (int i = 0; i < this->gridCols*this->n; i++)
-			{
-				for (int k = 0; k < this->gridLevels*this->n; k++)
-				{
-//					cout << "a" <<std::endl;
-					tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n;
-//					cout << "b" <<std::endl;
-					tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n;
-//					cout << "c" <<std::endl;
-					if(tmp1 > work_u.getSize())
-						cout << "tmp1: " << tmp1 << " x: " << j <<" y: " << i <<" z: " << k <<std::endl;
-					if(tmp2 > work_u.getSize())
-						cout << "tmp2: " << tmp2 << " x: " << j <<" y: " << i <<" z: " << k <<std::endl;
-					grid1 = getSubgridValue(getOwner(tmp1));
-//					cout << "d" <<std::endl;
-					grid2 = getSubgridValue(getOwner(tmp2));
-//					cout << "e" <<std::endl;
-					if(getOwner(tmp1)==getOwner(tmp2))
-						cout << "i, j, k" << i << "," << j << "," << k <<std::endl;
-					if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-					{
-						this->work_u[tmp2] = this->work_u[tmp1];
-//						cout << "f" <<std::endl;
-						this->unusedCell[tmp2] = 0;
-//						cout << "g" <<std::endl;
-						if(grid2 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp2), -INT_MAX);
-						}
-//						cout << "h" <<std::endl;
-						if(! (getBoundaryCondition(getOwner(tmp2)) & 8) )
-							setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8);
-//						cout << "i" <<std::endl;
-					}
-					else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-					{
-						this->work_u[tmp1] = this->work_u[tmp2];
-//						cout << "j" <<std::endl;
-						this->unusedCell[tmp1] = 0;
-//						cout << "k" <<std::endl;
-						if(grid1 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp1), -INT_MAX);
-						}
-//						cout << "l" <<std::endl;
-						if(! (getBoundaryCondition(getOwner(tmp1)) & 1) )
-							setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1);
-//						cout << "m" <<std::endl;
-					}
-				}
-			}
-		}
-
-//	}
-//	else
-//	{
-
-		cout << "sync 2" <<std::endl;
-		for(int i = 1; i < this->gridCols; i++)
-		{
-			for (int j = 0; j < this->gridRows*this->n; j++)
-			{
-				for (int k = 0; k < this->gridLevels*this->n; k++)
-				{
-					tmp1 = this->gridCols*this->n*j + i*this->n - 1 + k*this->gridCols*this->n*this->gridRows*this->n;
-					tmp2 = this->gridCols*this->n*j + i*this->n + k*this->gridCols*this->n*this->gridRows*this->n;
-					grid1 = getSubgridValue(getOwner(tmp1));
-					grid2 = getSubgridValue(getOwner(tmp2));
-					if(getOwner(tmp1)==getOwner(tmp2))
-						cout << "i, j, k" << i << "," << j << "," << k <<std::endl;
-					if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-					{
-						this->work_u[tmp2] = this->work_u[tmp1];
-						this->unusedCell[tmp2] = 0;
-						if(grid2 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp2), -INT_MAX);
-						}
-						if(! (getBoundaryCondition(getOwner(tmp2)) & 4) )
-							setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4);
-					}
-					else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-					{
-						this->work_u[tmp1] = this->work_u[tmp2];
-						this->unusedCell[tmp1] = 0;
-						if(grid1 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp1), -INT_MAX);
-						}
-						if(! (getBoundaryCondition(getOwner(tmp1)) & 2) )
-							setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2);
-					}
-				}
-			}
-		}
-
-		cout << "sync 3" <<std::endl;
-
-		for(int k = 1; k < this->gridLevels; k++)
-		{
-			for (int j = 0; j < this->gridRows*this->n; j++)
-			{
-				for (int i = 0; i < this->gridCols*this->n; i++)
-				{
-					tmp1 = this->gridCols*this->n*j + i + (k*this->n-1)*this->gridCols*this->n*this->gridRows*this->n;
-					tmp2 = this->gridCols*this->n*j + i + k*this->n*this->gridCols*this->n*this->gridRows*this->n;
-					grid1 = getSubgridValue(getOwner(tmp1));
-					grid2 = getSubgridValue(getOwner(tmp2));
-					if(getOwner(tmp1)==getOwner(tmp2))
-						cout << "i, j, k" << i << "," << j << "," << k <<std::endl;
-					if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
-					{
-						this->work_u[tmp2] = this->work_u[tmp1];
-						this->unusedCell[tmp2] = 0;
-						if(grid2 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp2), -INT_MAX);
-						}
-						if(! (getBoundaryCondition(getOwner(tmp2)) & 32) )
-							setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+32);
-					}
-					else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
-					{
-						this->work_u[tmp1] = this->work_u[tmp2];
-						this->unusedCell[tmp1] = 0;
-						if(grid1 == INT_MAX)
-						{
-							setSubgridValue(getOwner(tmp1), -INT_MAX);
-						}
-						if(! (getBoundaryCondition(getOwner(tmp1)) & 16) )
-							setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+16);
-					}
-				}
-			}
-		}
-//		}
-
-
-
-	this->currentStep++;
-	int stepValue = this->currentStep + 4;
-	for (int i = 0; i < this->subgridValues.getSize(); i++)
-	{
-		if( getSubgridValue(i) == -INT_MAX )
-			setSubgridValue(i, stepValue);
-	}
-
-	cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl;
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const
-{
-
-	int j = i % (this->gridCols*this->gridRows*this->n*this->n);
-
-	return ( (i / (this->gridCols*this->gridRows*this->n*this->n*this->n))*this->gridCols*this->gridRows
-			+ (j / (this->gridCols*this->n*this->n))*this->gridCols
-			+ (j % (this->gridCols*this->n))/this->n);
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const
-{
-	return this->subgridValues[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value)
-{
-	this->subgridValues[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const
-{
-	return this->boundaryConditions[i];
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value)
-{
-	this->boundaryConditions[i] = value;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid()
-{
-	cout << "Stretching grid..." <<std::endl;
-
-
-	this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) );
-	this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) );
-	this->gridLevels = ceil( ((double)(this->mesh.getDimensions().z()-1)) / ((double)(this->n-1)) );
-
-	//this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ;
-	//this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ;
-
-	cout << "Setting gridCols to " << this->gridCols << "." <<std::endl;
-	cout << "Setting gridRows to " << this->gridRows << "." <<std::endl;
-	cout << "Setting gridLevels to " << this->gridLevels << "." <<std::endl;
-
-	this->subgridValues.setSize(this->gridCols*this->gridRows*this->gridLevels);
-	this->subgridValues.setValue(0);
-	this->boundaryConditions.setSize(this->gridCols*this->gridRows*this->gridLevels);
-	this->boundaryConditions.setValue(0);
-	this->calculationsCount.setSize(this->gridCols*this->gridRows*this->gridLevels);
-	this->calculationsCount.setValue(0);
-
-	for(int i = 0; i < this->subgridValues.getSize(); i++ )
-	{
-		this->subgridValues[i] = INT_MAX;
-		this->boundaryConditions[i] = 0;
-	}
-
-	int levelSize = this->n*this->n*this->gridCols*this->gridRows;
-	int stretchedSize = this->n*levelSize*this->gridLevels;
-
-	if(!this->work_u.setSize(stretchedSize))
-		cerr << "Could not allocate memory for stretched grid." <<std::endl;
-	if(!this->unusedCell.setSize(stretchedSize))
-		cerr << "Could not allocate memory for supporting stretched grid." <<std::endl;
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-
-
-
-	for(int i = 0; i < levelSize; i++)
-	{
-		int diff =(this->n*this->gridCols) - idealStretch ;
-
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if(i%(this->n*this->gridCols) - idealStretch  >= 0)
-		{
-			k+= i%(this->n*this->gridCols) - idealStretch +1 ;
-		}
-
-		if(i/(this->n*this->gridCols) - idealStretch + 1  > 0)
-		{
-			k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ;
-		}
-
-		for( int j = 0; j<this->n*this->gridLevels; j++)
-		{
-			this->unusedCell[i+j*levelSize] = 1;
-			int l = j/this->n;
-
-			if(j - idealStretch  >= 0)
-			{
-				l+= j - idealStretch + 1;
-			}
-
-			this->work_u[i+j*levelSize] = this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k];
-		}
-
-	}
-
-
-
-	cout << "Grid stretched." <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::contractGrid()
-{
-	cout << "Contracting grid..." <<std::endl;
-	int levelSize = this->n*this->n*this->gridCols*this->gridRows;
-	int stretchedSize = this->n*levelSize*this->gridLevels;
-
-	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
-	cout << idealStretch <<std::endl;
-
-
-	for(int i = 0; i < levelSize; i++)
-	{
-		int diff =(this->n*this->gridCols) - idealStretch ;
-		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
-
-		if((i%(this->n*this->gridCols) - idealStretch  < 0) && (i/(this->n*this->gridCols) - idealStretch + 1  <= 0) )
-		{
-			for( int j = 0; j<this->n*this->gridLevels; j++)
-			{
-				int l = j/this->n;
-				if(j - idealStretch  < 0)
-					this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+j*levelSize];
-			}
-		}
-
-	}
-
-	cout << "Grid contracted" <<std::endl;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const
-{
-
-	VectorType u;
-	u.setSize(this->n*this->n*this->n);
-
-	int idx, idy, idz;
-	idz = i / (gridRows*this->gridCols);
-	idy = (i % (this->gridRows*this->gridCols)) / this->gridCols;
-	idx = i %  (this->gridCols);
-
-	for( int j = 0; j < this->n; j++)
-	{
-	//	int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n);
-		for( int k = 0; k < this->n; k++)
-		{
-			for( int l = 0; l < this->n; l++)
-			{
-				int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows
-						 + (idy) * this->n*this->n*this->gridCols
-						 + (idx) * this->n
-						 + k * this->n*this->gridCols
-						 + j;
-
-				u[j + k*this->n  + l*this->n*this->n] = this->work_u[ index ];
-			}
-		}
-	}
-	return u;
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i )
-{
-	int idx, idy, idz;
-	idz = i / (this->gridRows*this->gridCols);
-	idy = (i % (this->gridRows*this->gridCols)) / this->gridCols;
-	idx = i %  (this->gridCols);
-
-	for( int j = 0; j < this->n; j++)
-	{
-	//	int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n);
-		for( int k = 0; k < this->n; k++)
-		{
-			for( int l = 0; l < this->n; l++)
-			{
-
-				int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows
-						 + (idy) * this->n*this->n*this->gridCols
-						 + (idx) * this->n
-						 + k * this->n*this->gridCols
-						 + j;
-
-				//OMP LOCK index
-//				cout<< idx << " " << idy << " " << idz << " " << j << " " << k << " " << l << " " << idz << " " << unusedCell.getSize() << " " << u.getSize() << " " << index <<endl;
-				if( (fabs(this->work_u[index]) > fabs(u[j + k*this->n  + l*this->n*this->n])) || (this->unusedCell[index] == 1) )
-				{
-					this->work_u[index] = u[j + k*this->n  + l*this->n*this->n];
-					this->unusedCell[index] = 0;
-				}
-				//OMP UNLOCK index
-			}
-		}
-	}
-}
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType
-tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID)
-{
-
-	VectorType fu;
-
-	fu.setLike(u);
-	fu.setValue( 0.0 );
-
-
-	bool tmp = false;
-	for(int i = 0; i < u.getSize(); i++)
-	{
-		if(u[0]*u[i] <= 0.0)
-			tmp=true;
-	}
-	int idx,idy,idz;
-	idz = subGridID / (this->gridRows*this->gridCols);
-	idy = (subGridID % (this->gridRows*this->gridCols)) / this->gridCols;
-	idx = subGridID %  (this->gridCols);
-	int centerGID = (this->n*idy + (this->n>>1) )*(this->n*this->gridCols) + this->n*idx + (this->n>>1)
-			      + ((this->n>>1)+this->n*idz)*this->n*this->n*this->gridRows*this->gridCols;
-	if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0)
-		tmp = true;
-	//if(this->currentStep + 3 < getSubgridValue(subGridID))
-		//tmp = true;
-
-
-	double value = sign(u[0]) * u.absMax();
-
-	if(tmp)
-	{}
-
-
-	//north - 1, east - 2, west - 4, south - 8
-	else if(boundaryCondition == 4)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j = 1;j < this->n; j++)
-				for(int k = 0;k < this->n; k++)
-				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[i*this->n];
-	}
-	else if(boundaryCondition == 2)
-	{
-		for(int i = 0; i < this->n; i++)
-			for(int j =0 ;j < this->n -1; j++)
-				for(int k = 0;k < this->n; k++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[(i+1)*this->n - 1];
-	}
-	else if(boundaryCondition == 1)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 0;i < this->n - 1; i++)
-				for(int k = 0;k < this->n; k++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
-	}
-	else if(boundaryCondition == 8)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 1;i < this->n; i++)
-				for(int k = 0;k < this->n; k++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[j];
-	}
-	else if(boundaryCondition == 16)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 0;i < this->n ; i++)
-				for(int k = 0;k < this->n-1; k++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
-	}
-	else if(boundaryCondition == 32)
-	{
-		for(int j = 0; j < this->n; j++)
-			for(int i = 0;i < this->n; i++)
-				for(int k = 1;k < this->n; k++)
-				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
-				u[k*this->n*this->n + i*this->n + j] = value;// u[j];
-	}
-
-
-   double time = 0.0;
-   double currentTau = this->tau0;
-   double finalTime = this->stopTime;// + 3.0*(u.max() - u.min());
-   if(boundaryCondition == 0) finalTime *= 2.0;
-   if( time + currentTau > finalTime ) currentTau = finalTime - time;
-
-   double maxResidue( 1.0 );
-   //double lastResidue( 10000.0 );
-   tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-   while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/)
-   {
-      /****
-       * Compute the RHS
-       */
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-//    	 std::cout << "i: " << i << ", time: " << time <<endl;
-    	  Containers::StaticVector<3,int> coords(i % subMesh.getDimensions().x(),
-    	  								(i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) / subMesh.getDimensions().x(),
-    	  								i / (subMesh.getDimensions().x()*subMesh.getDimensions().y()));
-//    	  	cout << "b " << i << " " << i % subMesh.getDimensions().x() << " " << (i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) << " " << (i % subMesh.getDimensions().x()*subMesh.getDimensions().y()) / subMesh.getDimensions().x() << " " << subMesh.getDimensions().x()*subMesh.getDimensions().y() << " " <<endl;
-			Entity.setCoordinates(coords);
-//			cout <<"c" << coords <<std::endl;
-			Entity.refresh();
-//			cout << "d" <<endl;
-			neighborEntities.refresh(subMesh,Entity.getIndex());
-//			cout << "e" <<endl;
-    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, coords,u, time, boundaryCondition, neighborEntities );
-//    	 std::cout << "f" <<endl;
-      }
-      maxResidue = fu. absMax();
-
-
-      if( this -> cflCondition * maxResidue != 0.0)
-    	  currentTau =  this -> cflCondition / maxResidue;
-
-     /* if (maxResidue < 0.05)
-    	 std::cout << "Max < 0.05" <<std::endl;*/
-      if(currentTau > 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >())
-    	  currentTau = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >();
-      /*if(maxResidue > lastResidue)
-    	  currentTau *=(1.0/10.0);*/
-
-
-      if( time + currentTau > finalTime ) currentTau = finalTime - time;
-//      for( int i = 0; i < fu.getSize(); i ++ )
-//      {
-//    	  //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <<std::endl;
-//    	  if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 )
-//    		  currentTau = fabs(u[i]/(2.0*fu[i]));
-//
-//      }
-
-
-      for( int i = 0; i < fu.getSize(); i ++ )
-      {
-    	  double add = u[i] + currentTau * fu[ i ];
-    	  //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) )
-    		  u[ i ] = add;
-      }
-      time += currentTau;
-
-      //cout << '\r' << flush;
-     //cout << maxResidue << "   " << currentTau << " @ " << time << flush;
-     //lastResidue = maxResidue;
-   }
-   //cout << "Time: " << time << ", Res: " << maxResidue <<endl;
-	/*if (u.max() > 0.0)
-		this->stopTime /=(double) this->gridCols;*/
-
-//	VectorType solution;
-//	solution.setLike(u);
-//    for( int i = 0; i < u.getSize(); i ++ )
-//  	{
-//    	solution[i]=u[i];
-//   	}
-//	return solution;
-	return u;
-}
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-	//int j = threadIdx.x + threadIdx.y * blockDim.x;
-//	int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows
-//			 + (blockIdx.y) * this->n*this->n*this->gridCols
-//             + (blockIdx.x) * this->n
-//             + threadIdx.y * this->n*this->gridCols
-//             + threadIdx.x;
-
-
-	int index =  blockDim.x*blockIdx.x + threadIdx.x +
-			  (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x +
-			  (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y;
-
-	//printf("i= %d,j= %d,th= %d\n",i,j,th);
-	*a = caller->work_u_cuda[index];
-	//printf("Hi %f \n", *a);
-	//return ret;
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
-{
-//	int j = threadIdx.x + threadIdx.y * blockDim.x;
-//	int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows
-//			 + (blockIdx.y) * this->n*this->n*this->gridCols
-//             + (blockIdx.x) * this->n
-//             + threadIdx.y * this->n*this->gridCols
-//             + threadIdx.x;
-
-	int index =  blockDim.x*blockIdx.x + threadIdx.x +
-			  (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x +
-			  (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y;
-
-	if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) )
-	{
-		caller->work_u_cuda[index] = *a;
-		caller->unusedCell_cuda[index] = 0;
-
-	}
-
-	*a = caller->work_u_cuda[index];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA3D( double u, const int i )
-{
-
-
-//	int j = threadIdx.x + threadIdx.y * blockDim.x;
-	//printf("j = %d, u = %f\n", j,u);
-
-//		int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows
-//				 + (blockIdx.y) * this->n*this->n*this->gridCols
-//	             + (blockIdx.x) * this->n
-//	             + threadIdx.y * this->n*this->gridCols
-//	             + threadIdx.x;
-
-		int index =  blockDim.x*blockIdx.x + threadIdx.x +
-				  (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x +
-				  (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y;
-
-		//printf("i= %d,j= %d,index= %d\n",i,j,index);
-		if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )
-		{
-			this->work_u_cuda[index] = u;
-			this->unusedCell_cuda[index] = 0;
-
-		}
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID)
-{
-
-	__shared__ int tmp;
-	__shared__ double value;
-	//double tmpRes = 0.0;
-	volatile double* sharedTau = &u[blockDim.x*blockDim.y*blockDim.z];
-//	volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z];
-	int i = threadIdx.x;
-	int j = threadIdx.y;
-	int k = threadIdx.z;
-	int l = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z*blockDim.x*blockDim.y;
-	bool computeFU = !((i == 0 && (boundaryCondition & 4)) or
-			 (i == blockDim.x - 1 && (boundaryCondition & 2)) or
-			 (j == 0 && (boundaryCondition & 8)) or
-			 (j == blockDim.y - 1  && (boundaryCondition & 1))or
-			 (k == 0 && (boundaryCondition & 32)) or
-			 (k == blockDim.z - 1  && (boundaryCondition & 16)));
-
-	if(l == 0)
-	{
-		tmp = 0;
-		int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1) )*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1)
-				      + ((blockDim.z>>1)+blockDim.z*blockIdx.z)*blockDim.x*blockDim.y*gridDim.x*gridDim.y;
-		if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0)
-			tmp = 1;
-	}
-	__syncthreads();
-
-
-	__syncthreads();
-	if(tmp !=1)
-	{
-//		if(computeFU)
-//			absVal[l]=0.0;
-//		else
-//			absVal[l] = fabs(u[l]);
-//
-//		__syncthreads();
-//
-//	      if((blockDim.x == 16) && (l < 128))		absVal[l] = Max(absVal[l],absVal[l+128]);
-//	      __syncthreads();
-//	      if((blockDim.x == 16) && (l < 64))		absVal[l] = Max(absVal[l],absVal[l+64]);
-//	      __syncthreads();
-//	      if(l < 32)    							absVal[l] = Max(absVal[l],absVal[l+32]);
-//	      if(l < 16)								absVal[l] = Max(absVal[l],absVal[l+16]);
-//	      if(l < 8)									absVal[l] = Max(absVal[l],absVal[l+8]);
-//	      if(l < 4)									absVal[l] = Max(absVal[l],absVal[l+4]);
-//	      if(l < 2)									absVal[l] = Max(absVal[l],absVal[l+2]);
-//	      if(l < 1)									value   = sign(u[0])*Max(absVal[l],absVal[l+1]);
-//		__syncthreads();
-//
-//		if(computeFU)
-//			u[l] = value;
-		if(computeFU)
-		{
-			tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Ent(subMesh);
-			if(boundaryCondition == 4)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(0,j,k));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x) ;//+  2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x+this->n);
-			}
-			else if(boundaryCondition == 2)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(blockDim.x - 1,j,k));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.x - threadIdx.x - 1+this->n);
-			}
-			else if(boundaryCondition == 8)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(i,0,k));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y+this->n);
-			}
-			else if(boundaryCondition == 1)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(i,blockDim.y - 1,k));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.y - threadIdx.y  - 1 +this->n);
-			}
-			else if(boundaryCondition == 32)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,0));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(threadIdx.z);
-			}
-			else if(boundaryCondition == 16)
-			{
-				Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,blockDim.z - 1));
-			   	Ent.refresh();
-				u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(this->n - 1 - threadIdx.z) ;
-			}
-		}
-	}
-
-   double time = 0.0;
-   __shared__ double currentTau;
-   double cfl = this->cflCondition;
-   double fu = 0.0;
-//   if(threadIdx.x * threadIdx.y * threadIdx.z == 0)
-//   {
-//	   currentTau = this->tau0;
-//   }
-   double finalTime = this->stopTime;
-   __syncthreads();
-   if( boundaryCondition == 0 ) finalTime *= 2.0;
-
-   tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh);
-   tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-   Entity.setCoordinates(Containers::StaticVector<3,int>(i,j,k));
-   Entity.refresh();
-   neighborEntities.refresh(subMesh,Entity.getIndex());
-
-
-   while( time < finalTime )
-   {
-	  sharedTau[l]=finalTime;
-
-	  if(computeFU)
-	  {
-		  fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighborEntities);
-		  if(abs(fu) > 0.0)
-			  sharedTau[l]=abs(cfl/fu);
-	  }
-
-      if(l == 0)
-      {
-    	  if(sharedTau[0] > 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >())	sharedTau[0] = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >();
-      }
-      else if(l == blockDim.x*blockDim.y*blockDim.z - 1)
-      {
-    	  if( time + sharedTau[l] > finalTime )		sharedTau[l] = finalTime - time;
-      }
-
-      __syncthreads();
-      if(l < 256)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]);
-      __syncthreads();
-      if(l < 128)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]);
-      __syncthreads();
-      if(l < 64)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]);
-      __syncthreads();
-      if(l < 32)    							sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]);
-      __syncthreads();
-      if(l < 16)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]);
-      if(l < 8)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]);
-      if(l < 4)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]);
-      if(l < 2)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]);
-      if(l < 1)									currentTau   = Min(sharedTau[l],sharedTau[l+1]);
-      __syncthreads();
-
-//	if(abs(fu) < 10000.0)
-//		printf("bla");
-      if(computeFU)
-    	  u[l] += currentTau * fu;
-      time += currentTau;
-      __syncthreads();
-   }
-
-
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA3D(int i) const
-{
-	int j = i % (this->gridCols*this->gridRows*this->n*this->n);
-
-	return ( (i / (this->gridCols*this->gridRows*this->n*this->n))*this->gridCols*this->gridRows
-			+ (j / (this->gridCols*this->n*this->n))*this->gridCols
-			+ (j % (this->gridCols*this->n))/this->n);
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA3D( int i ) const
-{
-	return this->subgridValues_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA3D(int i, int value)
-{
-	this->subgridValues_cuda[i] = value;
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA3D( int i ) const
-{
-	return this->boundaryConditions_cuda[i];
-}
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__device__
-void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA3D(int i, int value)
-{
-	this->boundaryConditions_cuda[i] = value;
-}
-
-
-
-//north - 1, east - 2, west - 4, south - 8, up -16, down - 32
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
-{
-
-	__shared__ int boundary[6]; // north,east,west,south
-	__shared__ int subgridValue;
-	__shared__ int newSubgridValue;
-
-
-	int gid =  blockDim.x*blockIdx.x + threadIdx.x +
-			  (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x +
-			  (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y;
-	double u = cudaSolver->work_u_cuda[gid];
-	double u_cmp;
-	int subgridValue_cmp=INT_MAX;
-	int boundary_index=0;
-
-
-	if(threadIdx.x+threadIdx.y+threadIdx.z == 0)
-	{
-		subgridValue = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y);
-		boundary[0] = 0;
-		boundary[1] = 0;
-		boundary[2] = 0;
-		boundary[3] = 0;
-		boundary[4] = 0;
-		boundary[5] = 0;
-		newSubgridValue = 0;
-//		printf("aaa z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x);
-	}
-	__syncthreads();
-
-
-
-	if(		(threadIdx.x == 0 				/*				&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == 0 				 	/*			&& (cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.z == 0 	 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.x == blockDim.x - 1 	 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.y == blockDim.y - 1 	 /*	&& (cudaSolver->currentStep & 1)*/) 		||
-			(threadIdx.z == blockDim.z - 1 	 /*	&& (cudaSolver->currentStep & 1)*/) 		)
-	{
-		if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y - 1);
-			boundary_index = 2;
-		}
-
-		if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + 1];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y + 1);
-			boundary_index = 1;
-		}
-
-		__threadfence();
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-			u=u_cmp;
-		}
-		__threadfence();
-		if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y - 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y);
-			boundary_index = 3;
-		}
-		if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y + 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y);
-			boundary_index = 0;
-		}
-
-		__threadfence();
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-			u=u_cmp;
-		}
-		__threadfence();
-
-		if(threadIdx.z == 0 && (blockIdx.z != 0)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x*blockDim.y*gridDim.y];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z - 1)*gridDim.x*gridDim.y);
-			boundary_index = 5;
-		}
-		if(threadIdx.z == blockDim.z - 1 && (blockIdx.z != gridDim.z - 1)/* && (cudaSolver->currentStep & 1)*/)
-		{
-			u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x*blockDim.y*gridDim.y];
-			subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z + 1)*gridDim.x*gridDim.y);
-			boundary_index = 4;
-		}
-		__threadfence();
-
-		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
-		{
-			cudaSolver->unusedCell_cuda[gid] = 0;
-			atomicMax(&newSubgridValue, INT_MAX);
-			atomicMax(&boundary[boundary_index], 1);
-			cudaSolver->work_u_cuda[gid] = u_cmp;
-		}
-		__threadfence();
-
-	}
-	__syncthreads();
-
-	if(threadIdx.x+threadIdx.y+threadIdx.z == 0)
-	{
-
-		if(subgridValue == INT_MAX && newSubgridValue != 0)
-			cudaSolver->setSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, -INT_MAX);
-
-		cudaSolver->setBoundaryConditionCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, 	1  * boundary[0] +
-																													2  * boundary[1] +
-																													4  * boundary[2] +
-																													8  * boundary[3] +
-																													16 * boundary[4] +
-																													32 * boundary[5] );
-		if(blockIdx.x+blockIdx.y+blockIdx.z == 0)
-		{
-			cudaSolver->currentStep = cudaSolver->currentStep + 1;
-			*(cudaSolver->runcuda) = 0;
-		}
-	}
-}
-
-
-
-template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver)
-{
-	int stepValue = cudaSolver->currentStep + 4;
-	if( cudaSolver->getSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
-			cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue);
-
-	atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x));
-}
-
-
-
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device>
-__global__
-void initCUDA3D( tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3)
-{
-
-
-	cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double));
-	cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int));
-	cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int));
-	cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int));
-	cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool));
-	*(cudaSolver->runcuda) = 1;
-	cudaSolver->currentStep = 1;
-	//cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost);
-	//ptr = cudaSolver->work_u_cuda;
-	printf("GPU memory allocated.\n");
-
-	for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels; i++)
-	{
-		cudaSolver->subgridValues_cuda[i] = INT_MAX;
-		cudaSolver->boundaryConditions_cuda[i] = 0;
-	}
-
-	/*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++)
-	{
-		printf("%d\n",j);
-		cudaSolver->unusedCell_cuda[ j] = 1;
-	}*/
-	printf("GPU memory initialized.\n");
-}
-
-
-
-
-//extern __shared__ double array[];
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller)
-
-{
-
-
-	extern __shared__ double u[];
-
-	int i =  blockIdx.z *  gridDim.x *  gridDim.y +  blockIdx.y *  gridDim.x +  blockIdx.x;
-	int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
-
-	__shared__ int containsCurve;
-	if(l == 0)
-	{
-//		printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x);
-		containsCurve = 0;
-	}
-
-	caller->getSubgridCUDA3D(i,caller, &u[l]);
-	__syncthreads();
-	if(u[0] * u[l] <= 0.0)
-	{
-		atomicMax( &containsCurve, 1);
-	}
-
-	__syncthreads();
-	if(containsCurve == 1)
-	{
-		caller->runSubgridCUDA3D(0,u,i);
-		__syncthreads();
-//		caller->insertSubgridCUDA3D(u[l],i);
-		caller->updateSubgridCUDA3D(i,caller, &u[l]);
-
-		__syncthreads();
-		if(l == 0)
-			caller->setSubgridValueCUDA3D(i, 4);
-	}
-
-
-}
-
-
-
-
-
-template< typename SchemeHost, typename SchemeDevice, typename Device >
-__global__
-void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller)
-{
-	extern __shared__ double u[];
-	int i =  blockIdx.z *  gridDim.x *  gridDim.y +  blockIdx.y *  gridDim.x +  blockIdx.x;
-	int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
-	int bound = caller->getBoundaryConditionCUDA3D(i);
-
-	if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0)
-	{
-		caller->getSubgridCUDA3D(i,caller, &u[l]);
-
-		//if(l == 0)
-			//printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA3D(i));
-		if(caller->getSubgridValueCUDA3D(i) == caller->currentStep+4)
-		{
-			if(bound & 1)
-			{
-				caller->runSubgridCUDA3D(1,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 2 )
-			{
-				caller->runSubgridCUDA3D(2,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 4)
-			{
-				caller->runSubgridCUDA3D(4,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 8)
-			{
-				caller->runSubgridCUDA3D(8,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 16)
-			{
-				caller->runSubgridCUDA3D(16,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound & 32)
-			{
-				caller->runSubgridCUDA3D(32,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-
-		}
-		else
-		{
-			if( ((bound == 2)))
-			{
-				caller->runSubgridCUDA3D(2,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if( ((bound == 1) ))
-			{
-				caller->runSubgridCUDA3D(1,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if( ((bound == 8) ))
-			{
-				caller->runSubgridCUDA3D(8,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if((bound == 4))
-			{
-				caller->runSubgridCUDA3D(4,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound == 16)
-			{
-				caller->runSubgridCUDA3D(16,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-			if(bound == 32)
-			{
-				caller->runSubgridCUDA3D(32,u,i);
-				caller->updateSubgridCUDA3D(i,caller, &u[l]);
-				__syncthreads();
-			}
-		}
-																/*  1  2  4  8  16  32  */
-
-		if( ((bound & 19 )))									/*  1  1  0  0   1   0  */
-		{
-			caller->runSubgridCUDA3D(19,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if( ((bound & 21 )))									/*  1  0  1  0   1   0  */
-		{
-			caller->runSubgridCUDA3D(21,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if( ((bound & 26 )))									/*  0  1  0  1   1   0  */
-		{
-			caller->runSubgridCUDA3D(26,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if(   (bound & 28 ))									/*  0  0  1  1   1   0  */
-		{
-			caller->runSubgridCUDA3D(28,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-
-
-
-		if( ((bound & 35 )))									/*  1  0  1  0   0   1  */
-		{
-			caller->runSubgridCUDA3D(35,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if( ((bound & 37 )))									/*  1  0  1  0   0   1  */
-		{
-			caller->runSubgridCUDA3D(37,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if( ((bound & 42 )))									/*  0  1  0  1   0   1  */
-		{
-			caller->runSubgridCUDA3D(42,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-		if(   (bound & 44 ))									/*  0  0  1  1   0   1  */
-		{
-			caller->runSubgridCUDA3D(44,u,i);
-			caller->updateSubgridCUDA3D(i,caller, &u[l]);
-			__syncthreads();
-		}
-
-		if(l==0)
-		{
-			caller->setBoundaryConditionCUDA3D(i, 0);
-			caller->setSubgridValueCUDA3D(i, caller->getSubgridValueCUDA3D(i) - 1 );
-		}
-
-
-	}
-
-
-
-}
-
-#endif /*HAVE_CUDA*/
-
-#endif /* TNLPARALLELEIKONALSOLVER3D_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/CMakeLists.txt b/src/TNL/Legacy/narrow-band/CMakeLists.txt
deleted file mode 100644
index 158cd20132ed4f499228cda7400ba18776dc1503..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-set( tnl_narrow_band_SOURCES
-#     MainBuildConfig.h
-#     tnlNarrowBand2D_impl.h
-#     tnlNarrowBand.h
-#     narrowBandConfig.h 
-     main.cpp)
-
-
-IF(  BUILD_CUDA ) 
-	CUDA_ADD_EXECUTABLE(narrow-band main.cu)
-ELSE(  BUILD_CUDA )                
-	ADD_EXECUTABLE(narrow-band main.cpp)
-ENDIF( BUILD_CUDA )
-target_link_libraries (narrow-band tnl )
-
-
-INSTALL( TARGETS narrow-band
-         RUNTIME DESTINATION bin
-         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
-        
-#INSTALL( FILES ${tnl_narrow_band_SOURCES}
-#         DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/narrow-band )
diff --git a/src/TNL/Legacy/narrow-band/MainBuildConfig.h b/src/TNL/Legacy/narrow-band/MainBuildConfig.h
deleted file mode 100644
index ed3d686eb99379af1589d734eac9b5812cccdedf..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/MainBuildConfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
-                          MainBuildConfig.h  -  description
-                             -------------------
-    begin                : Jul 7, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef MAINBUILDCONFIG_H_
-#define MAINBUILDCONFIG_H_
-
-#include <solvers/tnlBuildConfigTags.h>
-
-class MainBuildConfig
-{
-   public:
-
-      static void print() {std::cerr << "MainBuildConfig" <<std::endl; }
-};
-
-/****
- * Turn off support for float and long double.
- */
-template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
-template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
-
-/****
- * Turn off support for short int and long int indexing.
- */
-template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
-template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
-
-/****
- * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
- */
-template< int Dimensions, typename Real, typename Device, typename Index >
-   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
-      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
-                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
-                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
-                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
-
-/****
- * Please, chose your preferred time discretisation  here.
- */
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
-template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
-
-/****
- * Only the Runge-Kutta-Merson solver is enabled by default.
- */
-template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
-
-#endif /* MAINBUILDCONFIG_H_ */
diff --git a/src/TNL/Legacy/narrow-band/main.cpp b/src/TNL/Legacy/narrow-band/main.cpp
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/main.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/narrow-band/main.cu b/src/TNL/Legacy/narrow-band/main.cu
deleted file mode 100644
index 8849008ff630db0400a6d7d98e789099e5fbb5d9..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/main.cu
+++ /dev/null
@@ -1,17 +0,0 @@
-/***************************************************************************
-                          main.cpp  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#include "main.h"
diff --git a/src/TNL/Legacy/narrow-band/main.h b/src/TNL/Legacy/narrow-band/main.h
deleted file mode 100644
index 51dbdac37cfc5ff76b5ad03b826bf3a4642b17b4..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/main.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          main.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-
-#include "MainBuildConfig.h"
-	//for HOST versions:
-//#include "tnlNarrowBand.h"
-	//for DEVICE versions:
-#include "tnlNarrowBand_CUDA.h"
-#include "narrowBandConfig.h"
-#include <solvers/tnlBuildConfigTags.h>
-
-#include <mesh/tnlGrid.h>
-#include <core/tnlDevice.h>
-#include <time.h>
-#include <ctime>
-
-typedef MainBuildConfig BuildConfig;
-
-int main( int argc, char* argv[] )
-{
-	time_t start;
-	time_t stop;
-	time(&start);
-	std::clock_t start2= std::clock();
-   Config::ParameterContainer parameters;
-   tnlConfigDescription configDescription;
-   narrowBandConfig< BuildConfig >::configSetup( configDescription );
-
-   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
-      return false;
-
-   const int& dim = parameters.getParameter< int >( "dim" );
-
-   if(dim == 2)
-   {
-		tnlNarrowBand<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver;
-		if(!solver.init(parameters))
-	   {
-			cerr << "Solver failed to initialize." <<std::endl;
-			return EXIT_FAILURE;
-	   }
-		TNL_CHECK_CUDA_DEVICE;
-	  std::cout << "-------------------------------------------------------------" <<std::endl;
-	  std::cout << "Starting solver..." <<std::endl;
-	   solver.run();
-   }
-//   else if(dim == 3)
-//   {
-//		tnlNarrowBand<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver;
-//		if(!solver.init(parameters))
-//	   {
-//			cerr << "Solver failed to initialize." <<std::endl;
-//			return EXIT_FAILURE;
-//	   }
-//		TNL_CHECK_CUDA_DEVICE;
-//	  std::cout << "-------------------------------------------------------------" <<std::endl;
-//	  std::cout << "Starting solver..." <<std::endl;
-//	   solver.run();
-//   }
-   else
-   {
-	  std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl;
-	   return EXIT_FAILURE;
-   }
-
-
-   time(&stop);
-  std::cout << "Solver stopped..." <<std::endl;
-  std::cout <<std::endl;
-  std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl;
-   return EXIT_SUCCESS;
-}
-
-
diff --git a/src/TNL/Legacy/narrow-band/narrowBandConfig.h b/src/TNL/Legacy/narrow-band/narrowBandConfig.h
deleted file mode 100644
index bab58ceac46bf9c766b697ed79c2c676111323a2..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/narrowBandConfig.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/***************************************************************************
-                          narrowBandConfig.h  -  description
-                             -------------------
-    begin                : Oct 15, 2015
-    copyright            : (C) 2015 by Tomas Sobotik
-    email                :
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-
-#ifndef NARROWBANDCONFIG_H_
-#define NARROWBANDCONFIG_H_
-
-#include <config/tnlConfigDescription.h>
-
-template< typename ConfigTag >
-class narrowBandConfig
-{
-   public:
-      static void configSetup( tnlConfigDescription& config )
-      {
-         config.addDelimiter( "Narrow Band Solver solver settings:" );
-         config.addEntry        < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" );
-         config.addRequiredEntry        < String > ( "initial-condition", "Initial condition for solver");
-         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
-         config.addRequiredEntry        < double > ( "tau", "Time step.");
-         config.addRequiredEntry        < double > ( "final-time", "Final time.");
-         config.addEntry       < String > ( "mesh", "Name of mesh.", "mesh.tnl" );
-         config.addEntry       < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" );
-      }
-};
-
-#endif /* NARROWBANDCONFIG_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand.h
deleted file mode 100644
index 7d3d19bc03b43247f735cf04c5c82368360a748d..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND_H_
-#define TNLNARROWBAND_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <functions/tnlMeshFunction.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-#ifdef HAVE_OPENMP
-#include <omp.h>
-#endif
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlNarrowBand
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-	tnlNarrowBand();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-
-	void setupSquare1000(Index i, Index j);
-	void setupSquare1100(Index i, Index j);
-	void setupSquare1010(Index i, Index j);
-	void setupSquare1001(Index i, Index j);
-	void setupSquare1110(Index i, Index j);
-	void setupSquare1101(Index i, Index j);
-	void setupSquare1011(Index i, Index j);
-	void setupSquare1111(Index i, Index j);
-	void setupSquare0000(Index i, Index j);
-	void setupSquare0100(Index i, Index j);
-	void setupSquare0010(Index i, Index j);
-	void setupSquare0001(Index i, Index j);
-	void setupSquare0110(Index i, Index j);
-	void setupSquare0101(Index i, Index j);
-	void setupSquare0011(Index i, Index j);
-	void setupSquare0111(Index i, Index j);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
-
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlNarrowBand();
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-
-	bool initGrid();
-	bool run();
-
-	//for single core version use this implementation:
-	void updateValue(const Index i, const Index j, const Index k);
-	//for parallel version use this one instead:
-//	void updateValue(const Index i, const Index j, DofVectorType* grid);
-
-	Real fabsMin(const Real x, const Real y);
-
-
-protected:
-
-	MeshType Mesh;
-
-	bool exactInput;
-
-
-	tnlMeshFunction<MeshType> dofVector, dofVector2;
-	DofVectorType data;
-
-	RealType h;
-
-	tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity;
-
-#ifdef HAVE_OPENMP
-//	omp_lock_t* gridLock;
-#endif
-
-
-};
-
-
-	//for single core version use this implementation:
-#include "tnlNarrowBand2D_impl.h"
-	//for parallel version use this one instead:
-// #include "tnlNarrowBand2D_openMP_impl.h"
-
-#include "tnlNarrowBand3D_impl.h"
-
-#endif /* TNLNARROWBAND_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
deleted file mode 100644
index dff0b48c8d69cc6fa6ec932e24446a8c3b1b1417..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h
+++ /dev/null
@@ -1,1317 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND2D_IMPL_H_
-#define TNLNARROWBAND2D_IMPL_H_
-
-#define NARROWBAND_SUBGRID_SIZE 32
-
-#include "tnlNarrowBand.h"
-
-#ifdef HAVE_CUDA
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-#endif
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const
-{
-	if(arg > 0.0)
-		return arg;
-	return 0.0;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-Real  tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const
-{
-	if(arg < 0.0)
-		return -arg;
-	return 0.0;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlNarrowBand< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
-:dofVector(Mesh)
-{
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	//Entity.refresh();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-	tau = parameters.getParameter< double >( "tau" );
-
-	finalTime = parameters.getParameter< double >( "final-time" );
-
-	statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaStatusVector),  statusGridSize*statusGridSize*sizeof(int));
-//	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(),  statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&reinitialize, sizeof(int));
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-#endif
-
-	int n = Mesh.getDimensions().x();
-
-	dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE);
-	dim3 numBlocks2(statusGridSize ,statusGridSize);
-	initSetupGridCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	initSetupGrid2CUDA<<<numBlocks2,1>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-
-	/*dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);*/
-	initCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-
-	cout << "Solver initialized." <<std::endl;
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlockFS(1, 512);
-	dim3 numBlocksFS(4,1);
-	dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE);
-	dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1);
-
-	double time = 0.0;
-	int reinit = 0;
-
-	cout << "Hi!" <<std::endl;
-	runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	cout << "Hi2!" <<std::endl;
-	while(time < finalTime)
-	{
-		if(tau+time > finalTime)
-			tau=finalTime-time;
-
-		runNarrowBandCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver,tau);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		time += tau;
-
-
-		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		if(reinit != 0 /*&& time != finalTime */)
-		{
-			cout << time <<std::endl;
-
-			initSetupGridCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			initSetupGrid2CUDA<<<numBlocksNB,1>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			initCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-		}
-	}
-
-	//data.setLike(dofVector.getData());
-	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	//data.save("u-00001.tnl");
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-	//								8 - to the east of curve, 	16 - to the west of curve.
-	int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize;
-	if(cudaStatusVector[subgridID] != 0 && i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y())
-	{
-		tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-		Entity.setCoordinates(CoordinatesType(i,j));
-		Entity.refresh();
-		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-		Real value = cudaDofVector2[Entity.getIndex()];
-		Real a,b, tmp;
-
-		if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9))*/ )
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-		else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17))*/ )
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		else
-		{
-			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-		}
-
-		if( j == 0 /*|| (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3))*/ )
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-		else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5)) */)
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		else
-		{
-			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-		}
-
-
-		if(abs(a-b) >= h)
-			tmp = fabsMin(a,b) + sign(value)*h;
-		else
-			tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
-	}
-
-}
-
-
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-
-
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-
-	if(solver->Mesh.getDimensions().x() > gx  && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int i = threadIdx.x + blockDim.x*blockIdx.x;
-	int j = blockDim.y*blockIdx.y + threadIdx.y;
-
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	int gid = Entity.getIndex();
-
-	if(abs(cudaDofVector2[gid]) > 1.5*h)
-		cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]);
-
-//	if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
-//	{
-//		if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
-//		{
-//			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-//			cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h;
-//		}
-//		if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
-//		{
-//			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-//			cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h;
-//		}
-//
-//		if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
-//		{
-//			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-//			cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h;
-//		}
-//		if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
-//		{
-//			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-//			cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h;
-//		}
-//	}
-
-
-//
-
-
-
-
-
-
-//	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() )
-//	{
-//		if(cudaDofVector[Entity.getIndex()] > 0)
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1111(i,j);
-//					else
-//						setupSquare1110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1101(i,j);
-//					else
-//						setupSquare1100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1011(i,j);
-//					else
-//						setupSquare1010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1001(i,j);
-//					else
-//						setupSquare1000(i,j);
-//				}
-//			}
-//		}
-//		else
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0111(i,j);
-//					else
-//						setupSquare0110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0101(i,j);
-//					else
-//						setupSquare0100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0011(i,j);
-//					else
-//						setupSquare0010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0001(i,j);
-//					else
-//						setupSquare0000(i,j);
-//				}
-//			}
-//		}
-//
-//	}
-
-	return true;
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	//Real fy = abs(y);
-
-	//Real tmpMin = Min(fx,abs(y));
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-
-	int gx = 0;
-	int gy = threadIdx.y;
-	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-	//	return;
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	//int id1 = gx+gy;
-	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-	if(blockIdx.x==0)
-	{
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-
-}
-
-
-
-
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	__shared__ double u0;
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-
-//		printf("Hello from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-		if(threadIdx.x+threadIdx.y == 0)
-		{
-//			printf("Hello from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-
-			if(blockIdx.x+blockIdx.y == 0)
-				*(solver->reinitialize) = 0;
-
-			solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0;
-
-			u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0];
-		}
-		__syncthreads();
-
-		double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x];
-
-		if(u*u0 <=0.0)
-			atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1);
-	}
-//	if(threadIdx.x+threadIdx.y == 0)
-
-//	printf("Bye from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-
-
-}
-
-
-
-// run this with one thread per block
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-//	printf("Hello\n");
-	if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1)
-	{
-//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-//								8 - to the east of curve, 	16 - to the west of curve.
-			if(blockIdx.x > 0)
-			{
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16);
-			}
-
-			if(blockIdx.x < gridDim.x - 1)
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8);
-
-			if(blockIdx.y > 0 )
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4);
-
-			if(blockIdx.y < gridDim.y - 1)
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2);
-	}
-
-
-}
-
-
-
-
-
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau)
-{
-	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x;
-	int i = threadIdx.x + blockIdx.x*blockDim.x;
-	int j = threadIdx.y + blockIdx.y*blockDim.y;
-
-//	if(i+j == 0)
-//		printf("Hello\n");
-
-	int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/
-
-	int status = solver->cudaStatusVector[blockID];
-
-	if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j)
-	{
-
-		if(status != 0)
-		{
-			tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
-			Entity.setCoordinates(Containers::StaticVector<2,double>(i,j));
-			Entity.refresh();
-			tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-			double value = solver->cudaDofVector2[Entity.getIndex()];
-			double xf,xb,yf,yb, grad, fu, a,b;
-			a = b = 0.0;
-
-			if( i == 0 || (threadIdx.x == 0 && !(status & 9)) )
-			{
-				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
-			}
-			else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) )
-			{
-				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()] - value;
-			}
-			else
-			{
-				xb =  value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
-			}
-
-			if( j == 0 || (threadIdx.y == 0 && !(status & 3)) )
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] ;
-				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
-			}
-			else if( j == solver->Mesh.getDimensions().y() - 1  || (threadIdx.y == blockDim.y - 1 && !(status & 5)) )
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()] - value;
-			}
-			else
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()];
-				yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
-			}
-			__syncthreads();
-
-
-
-
-
-			   if(sign(value) >= 0.0)
-			   {
-				   xf = solver->negativePart(xf);
-
-				   xb = solver->positivePart(xb);
-
-				   yf = solver->negativePart(yf);
-
-				   yb = solver->positivePart(yb);
-
-			   }
-			   else
-			   {
-
-				   xb = solver->negativePart(xb);
-
-				   xf = solver->positivePart(xf);
-
-				   yb = solver->negativePart(yb);
-
-				   yf = solver->positivePart(yf);
-			   }
-
-
-			   if(xb > xf)
-				   a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-			   else
-				   a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-
-			   if(yb > yf)
-				   b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
-			   else
-				   b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
-
-
-
-//			grad = sqrt(0.5 * (xf*xf + xb*xb    +   yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-
-			grad = sqrt(/*0.5 **/ (a*a    +   b*b ) );
-
-			fu = -1.0 * grad;
-
-			if((tau*fu+value)*value <=0 )
-			{
-				//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-				//								8 - to the east of curve, 	16 - to the west of curve.
-
-				if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) )
-					atomicMax(solver->reinitialize,1);
-				else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
-					atomicMax(solver->reinitialize,1);
-				else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) )
-					atomicMax(solver->reinitialize,1);
-				else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
-					atomicMax(solver->reinitialize,1);
-			}
-
-			solver->cudaDofVector2[Entity.getIndex()]  += tau*fu;
-		}
-	}
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-#endif
-
-
-
-
-#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
deleted file mode 100644
index c92810490069164d9f170249c328304576773d20..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h
+++ /dev/null
@@ -1,1313 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND2D_IMPL_H_
-#define TNLNARROWBAND2D_IMPL_H_
-
-#define NARROWBAND_SUBGRID_SIZE 32
-
-#include "tnlNarrowBand.h"
-
-__device__
-double fabsMin( double x, double y)
-{
-	double fx = abs(x);
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-}
-
-__device__
-double atomicFabsMin(double* address, double val)
-{
-	unsigned long long int* address_as_ull =
-						  (unsigned long long int*)address;
-	unsigned long long int old = *address_as_ull, assumed;
-	do {
-		assumed = old;
-			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
-	} while (assumed != old);
-	return __longlong_as_double(old);
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const
-{
-	if(arg > 0.0)
-		return arg;
-	return 0.0;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-Real  tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const
-{
-	if(arg < 0.0)
-		return -arg;
-	return 0.0;
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlNarrowBand< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
-:dofVector(Mesh)
-{
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	//Entity.refresh();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-	tau = parameters.getParameter< double >( "tau" );
-
-	finalTime = parameters.getParameter< double >( "final-time" );
-
-	statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaStatusVector),  statusGridSize*statusGridSize*sizeof(int));
-//	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(),  statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&reinitialize, sizeof(int));
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-#endif
-
-	int n = Mesh.getDimensions().x();
-
-	dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE);
-	dim3 numBlocks2(statusGridSize ,statusGridSize);
-	initSetupGridCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	initSetupGrid2CUDA<<<numBlocks2,1>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-
-	/*dim3 threadsPerBlock(16, 16);
-	dim3 numBlocks(n/16 + 1 ,n/16 +1);*/
-	initCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-
-	cout << "Solver initialized." <<std::endl;
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlockFS(1, 512);
-	dim3 numBlocksFS(4,1);
-	dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE);
-	dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1);
-
-	double time = 0.0;
-	int reinit = 0;
-
-	cout << "Hi!" <<std::endl;
-	runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	cout << "Hi2!" <<std::endl;
-	while(time < finalTime)
-	{
-		if(tau+time > finalTime)
-			tau=finalTime-time;
-
-		runNarrowBandCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver,tau);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-
-		time += tau;
-
-
-		cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
-		TNL_CHECK_CUDA_DEVICE;
-		if(reinit != 0 /*&& time != finalTime */)
-		{
-			cout << time <<std::endl;
-
-			initSetupGridCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			initSetupGrid2CUDA<<<numBlocksNB,1>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			initCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-			runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0);
-			cudaDeviceSynchronize();
-			TNL_CHECK_CUDA_DEVICE;
-		}
-	}
-
-	//data.setLike(dofVector.getData());
-	//cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	//data.save("u-00001.tnl");
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-	//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-	//								8 - to the east of curve, 	16 - to the west of curve.
-	int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);
-	if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && Mesh.getDimensions().y())
-	{
-		tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-		Entity.setCoordinates(CoordinatesType(i,j));
-		Entity.refresh();
-		tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-		Real value = cudaDofVector2[Entity.getIndex()];
-		Real a,b, tmp;
-
-		if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9)) */)
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-		else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17)) */)
-			a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-		else
-		{
-			a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-		}
-
-		if( j == 0/* || (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3)) */)
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-		else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5))*/ )
-			b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-		else
-		{
-			b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-					 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-		}
-
-
-		if(abs(a-b) >= h)
-			tmp = fabsMin(a,b) + sign(value)*h;
-		else
-			tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-	//	cudaDofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-		atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp);
-	}
-
-}
-
-
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-
-
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-
-	if(solver->Mesh.getDimensions().x() > gx  && solver->Mesh.getDimensions().y() > gy)
-	{
-		solver->initGrid();
-	}
-
-
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	int i = threadIdx.x + blockDim.x*blockIdx.x;
-	int j = blockDim.y*blockIdx.y + threadIdx.y;
-
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	int gid = Entity.getIndex();
-
-	cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]);
-
-	if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y())
-	{
-		if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 )
-		{
-			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-			cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h;
-		}
-		if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 )
-		{
-			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-			cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h;
-		}
-
-		if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 )
-		{
-			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-			cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h;
-		}
-		if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 )
-		{
-			cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h;
-			cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h;
-		}
-	}
-
-
-//
-
-
-
-
-
-
-//	if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() )
-//	{
-//		if(cudaDofVector[Entity.getIndex()] > 0)
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1111(i,j);
-//					else
-//						setupSquare1110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1101(i,j);
-//					else
-//						setupSquare1100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1011(i,j);
-//					else
-//						setupSquare1010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare1001(i,j);
-//					else
-//						setupSquare1000(i,j);
-//				}
-//			}
-//		}
-//		else
-//		{
-//			if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0111(i,j);
-//					else
-//						setupSquare0110(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0101(i,j);
-//					else
-//						setupSquare0100(i,j);
-//				}
-//			}
-//			else
-//			{
-//				if(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0011(i,j);
-//					else
-//						setupSquare0010(i,j);
-//				}
-//				else
-//				{
-//					if(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-//						setupSquare0001(i,j);
-//					else
-//						setupSquare0000(i,j);
-//				}
-//			}
-//		}
-//
-//	}
-
-	return true;
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	//Real fy = abs(y);
-
-	//Real tmpMin = Min(fx,abs(y));
-
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-
-	int gx = 0;
-	int gy = threadIdx.y;
-	//if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy)
-	//	return;
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-	//int gid = solver->Mesh.getDimensions().x() * gy + gx;
-	//int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x();
-
-	//int id1 = gx+gy;
-	//int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy;
-
-	if(blockIdx.x==0)
-	{
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-	}
-
-}
-
-
-
-
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	__shared__ double u0;
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy)
-	{
-
-//		printf("Hello from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-		if(threadIdx.x+threadIdx.y == 0)
-		{
-//			printf("Hello from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-
-			if(blockIdx.x+blockIdx.y == 0)
-				*(solver->reinitialize) = 0;
-
-			solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0;
-
-			u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0];
-		}
-		__syncthreads();
-
-		double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x];
-
-		if(u*u0 <=0.0)
-			atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1);
-	}
-//	if(threadIdx.x+threadIdx.y == 0)
-
-//	printf("Bye from  block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y);
-
-
-}
-
-
-
-// run this with one thread per block
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-//	printf("Hello\n");
-	if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1)
-	{
-//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-//								8 - to the east of curve, 	16 - to the west of curve.
-			if(blockIdx.x > 0)
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16);
-
-			if(blockIdx.x < gridDim.x - 1)
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8);
-
-			if(blockIdx.y > 0 )
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4);
-
-			if(blockIdx.y < gridDim.y - 1)
-				atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2);
-	}
-
-
-}
-
-
-
-
-
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau)
-{
-	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x;
-	int i = threadIdx.x + blockIdx.x*blockDim.x;
-	int j = threadIdx.y + blockIdx.y*blockDim.y;
-
-//	if(i+j == 0)
-//		printf("Hello\n");
-
-	int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/
-
-	int status = solver->cudaStatusVector[blockID];
-
-	if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j)
-	{
-
-//		if(status != 0)
-		{
-			tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh);
-			Entity.setCoordinates(Containers::StaticVector<2,double>(i,j));
-			Entity.refresh();
-			tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-			double value = solver->cudaDofVector2[Entity.getIndex()];
-			double xf,xb,yf,yb, grad, fu, a,b;
-			a = b = 0.0;
-
-			if( i == 0 /*|| (threadIdx.x == 0 && !(status & 9)) */)
-			{
-				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
-			}
-			else if( i == solver->Mesh.getDimensions().x() - 1 /*|| (threadIdx.x == blockDim.x - 1 && !(status & 17)) */)
-			{
-				xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()] - value;
-			}
-			else
-			{
-				xb =  value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-				xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] - value;
-			}
-
-			if( j == 0/* || (threadIdx.y == 0 && !(status & 3))*/ )
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] ;
-				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
-			}
-			else if( j == solver->Mesh.getDimensions().y() - 1  /*|| (threadIdx.y == blockDim.y - 1 && !(status & 5)) */)
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-				yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()] - value;
-			}
-			else
-			{
-				yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()];
-				yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] - value;
-			}
-			__syncthreads();
-
-
-
-
-
-			   if(sign(value) > 0.0)
-			   {
-				   xf = solver->negativePart(xf);
-
-				   xb = solver->positivePart(xb);
-
-				   yf = solver->negativePart(yf);
-
-				   yb = solver->positivePart(yb);
-
-			   }
-			   else
-			   {
-
-				   xb = solver->negativePart(xb);
-
-				   xf = solver->positivePart(xf);
-
-				   yb = solver->negativePart(yb);
-
-				   yf = solver->positivePart(yf);
-			   }
-
-
-			   if(xb > xf)
-				   a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-			   else
-				   a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-
-			   if(yb > yf)
-				   b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
-			   else
-				   b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >();
-
-
-
-//			grad = sqrt(0.5 * (xf*xf + xb*xb    +   yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >();
-
-			grad = sqrt(/*0.5 **/ (a*a    +   b*b ) );
-
-			fu = -1.0 * grad;
-
-//			if((tau*fu+value)*value <=0 )
-//			{
-//				//			1 - with curve,  	2 - to the north of curve, 	4  - to the south of curve,
-//				//								8 - to the east of curve, 	16 - to the west of curve.
-//
-//				if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) )
-//					atomicMax(solver->reinitialize,1);
-//				else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) )
-//					atomicMax(solver->reinitialize,1);
-//				else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) )
-//					atomicMax(solver->reinitialize,1);
-//				else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) )
-//					atomicMax(solver->reinitialize,1);
-//			}
-
-			solver->cudaDofVector2[Entity.getIndex()]  += tau*fu;
-		}
-	}
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[Entity.getIndex()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	Real al,be, a,b,c,s;
-	al=abs(cudaDofVector[Entity.getIndex()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 cudaDofVector[Entity.getIndex()]));
-
-	be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1,  0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-#endif
-
-
-
-
-#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h
deleted file mode 100644
index d42bc2a7610d5bb02a94031d446a61ac5a2e6579..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h
+++ /dev/null
@@ -1,927 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand2D_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND2D_IMPL_H_
-#define TNLNARROWBAND2D_IMPL_H_
-
-#include "tnlNarrowBand.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlNarrowBand< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
-:Entity(Mesh),
- dofVector(Mesh),
- dofVector2(Mesh)
-{
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-	dofVector2.load(initialCondition);
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0 >();
-	Entity.refresh();
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-	cout << "a" <<std::endl;
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++)
-	{
-		dofVector2[i]=INT_MAX*sign(dofVector[i]);
-	}
-
-	for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++)
-	{
-		for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++)
-			{
-			this->Entity.setCoordinates(CoordinatesType(i,j));
-			this->Entity.refresh();
-			neighborEntities.refresh(Mesh,Entity.getIndex());
-
-				if(dofVector[this->Entity.getIndex()] > 0)
-				{
-					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1111(i,j);
-							else
-								setupSquare1110(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1101(i,j);
-							else
-								setupSquare1100(i,j);
-						}
-					}
-					else
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1011(i,j);
-							else
-								setupSquare1010(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare1001(i,j);
-							else
-								setupSquare1000(i,j);
-						}
-					}
-				}
-				else
-				{
-					if(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()] > 0)
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0111(i,j);
-							else
-								setupSquare0110(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0101(i,j);
-							else
-								setupSquare0100(i,j);
-						}
-					}
-					else
-					{
-						if(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()] > 0)
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0011(i,j);
-							else
-								setupSquare0010(i,j);
-						}
-						else
-						{
-							if(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()] > 0)
-								setupSquare0001(i,j);
-							else
-								setupSquare0000(i,j);
-						}
-					}
-				}
-
-			}
-	}
-	cout << "a" <<std::endl;
-
-//	Real tmp = 0.0;
-//	Real ax=0.5/sqrt(2.0);
-//
-//	if(!exactInput)
-//	{
-//		for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++)
-//				dofVector[i]=0.5*h*sign(dofVector[i]);
-//	}
-//
-//
-//	for(Index i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		for(Index j = 1; j < Mesh.getDimensions().y()-1; j++)
-//		{
-//			 tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//			if(tmp == 0.0)
-//			{}
-//			else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//					dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//			{}
-//			else
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//		}
-//	}
-//
-//
-//
-//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		Index j = 0;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int i = 1; i < Mesh.getDimensions().x()-1; i++)
-//	{
-//		Index j = Mesh.getDimensions().y() - 1;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-//	{
-//		Index i = 0;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//	for(int j = 1; j < Mesh.getDimensions().y()-1; j++)
-//	{
-//		Index i = Mesh.getDimensions().x() - 1;
-//		tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//
-//
-//		if(tmp == 0.0)
-//		{}
-//		else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ||
-//				dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 )
-//		{}
-//		else
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//	}
-//
-//
-//	Index i = Mesh.getDimensions().x() - 1;
-//	Index j = Mesh.getDimensions().y() - 1;
-//
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	j = 0;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	i = 0;
-//	j = Mesh.getDimensions().y() -1;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-//
-//
-//
-//	j = 0;
-//	tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]);
-//	if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 &&
-//			dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0)
-//
-//		dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX;
-
-	//data.setLike(dofVector2.getData());
-	//data=dofVector2.getData();
-	//cout << data.getType() <<std::endl;
-	dofVector2.save("u-00000.tnl");
-	//dofVector2.getData().save("u-00000.tnl");
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-	{
-		for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-		{
-			updateValue(i,j);
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-//	data.setLike(dofVector2.getData());
-//	data = dofVector2.getData();
-//	cout << data.getType() <<std::endl;
-	dofVector2.save("u-00001.tnl");
-	//dofVector2.getData().save("u-00001.tnl");
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j)
-{
-
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity);
-
-	Real value = dofVector2[Entity.getIndex()];
-	Real a,b, tmp;
-
-	if( i == 0 )
-		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()];
-	else
-	{
-		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0 >()],
-				 dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()] );
-	}
-
-	if( j == 0 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()];
-	else
-	{
-		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1 >()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()] );
-	}
-
-
-	if(fabs(a-b) >= h)
-		tmp = fabsMin(a,b) + sign(value)*h;
-	else
-		tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) );
-
-
-	dofVector2[Entity.getIndex()] = fabsMin(value, tmp);
-
-//	if(dofVector2[Entity.getIndex()] > 1.0)
-//		cout << value << "    " << tmp << " " << dofVector2[Entity.getIndex()] <<std::endl;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-{
-//	this->Entity.setCoordinates(CoordinatesType(i,j));
-//	this->Entity.refresh();
-//	auto neighborEntities =  Entity.getNeighborEntities();
-//	dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]);
-//	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-//	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[Entity.getIndex()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	a = be/al;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-al;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	Real al,be, a,b,c,s;
-	al=abs(dofVector[Entity.getIndex()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()]-
-			 dofVector[Entity.getIndex()]));
-
-	be=abs(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]/
-			(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()]-
-			 dofVector[neighborEntities.template getEntityIndex< 0,  1 >()]));
-
-	a = al-be;
-	b=1.0;
-	c=-be;
-	s= h/sqrt(a*a+b*b);
-
-
-	dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j));
-	this->Entity.refresh();
-	auto neighborEntities =  Entity.getNeighborEntities();
-	dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]);
-	dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 0,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  1 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  1 >()]);
-	dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1,  0 >()],dofVector2[neighborEntities.template getEntityIndex< 1,  0 >()]);
-}
-
-
-
-
-#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h
deleted file mode 100644
index d362f249a79112aa9b902f86cd1a304702292423..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h
+++ /dev/null
@@ -1,961 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand2D_CUDA_v4_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND3D_IMPL_H_
-#define TNLNARROWBAND3D_IMPL_H_
-
-#include "tnlNarrowBand.h"
-
-//__device__
-//double fabsMin( double x, double y)
-//{
-//	double fx = abs(x);
-//
-//	if(Min(fx,abs(y)) == fx)
-//		return x;
-//	else
-//		return y;
-//}
-//
-//__device__
-//double atomicFabsMin(double* address, double val)
-//{
-//	unsigned long long int* address_as_ull =
-//						  (unsigned long long int*)address;
-//	unsigned long long int old = *address_as_ull, assumed;
-//	do {
-//		assumed = old;
-//			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) ));
-//	} while (assumed != old);
-//	return __longlong_as_double(old);
-//}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlNarrowBand< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-	this->h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
-	counter = 0;
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-
-
-#ifdef HAVE_CUDA
-
-	cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-	cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double));
-	cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice);
-
-
-	cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >));
-	cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice);
-
-#endif
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(8, 8,8);
-	dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-	initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	return true;
-}
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	int n = Mesh.getDimensions().x();
-	dim3 threadsPerBlock(1, 512);
-	dim3 numBlocks(8,1);
-
-
-	runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0);
-
-	cudaDeviceSynchronize();
-	TNL_CHECK_CUDA_DEVICE;
-
-	cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost);
-	cudaDeviceSynchronize();
-	cudaFree(cudaDofVector);
-	cudaFree(cudaDofVector2);
-	cudaFree(cudaSolver);
-	dofVector.save("u-00001.tnl");
-	cudaDeviceSynchronize();
-	return true;
-}
-
-
-
-
-#ifdef HAVE_CUDA
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-{
-	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j,k));
-	Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-	Real value = cudaDofVector2[Entity.getIndex()];
-	Real a,b,c, tmp;
-
-	if( i == 0 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-	else
-	{
-		a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 1,  0,  0 >()] );
-	}
-
-	if( j == 0 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()];
-	else
-	{
-		b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  1,  0 >()] );
-	}
-
-	if( k == 0 )
-		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()];
-	else if( k == Mesh.getDimensions().z() - 1 )
-		c = cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()];
-	else
-	{
-		c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1 >()],
-				 cudaDofVector2[neighborEntities.template getEntityIndex< 0,  0,  1 >()] );
-	}
-
-	Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c);
-
-	if(hD < 0.0)
-		tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h;
-	else
-		tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) );
-
-	atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp);
-
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k)
-{
-	tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh);
-	Entity.setCoordinates(CoordinatesType(i,j,k));
-	Entity.refresh();
-	int gid = Entity.getIndex();
-
-	if(abs(cudaDofVector[gid]) < 1.8*h)
-		cudaDofVector2[gid] = cudaDofVector[gid];
-	else
-		cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]);
-
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-__device__
-Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = abs(x);
-	if(Min(fx,abs(y)) == fx)
-		return x;
-	else
-		return y;
-
-
-}
-
-
-
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i)
-{
-
-	int gx = 0;
-	int gy = threadIdx.y;
-
-	int n = solver->Mesh.getDimensions().x();
-	int blockCount = n/blockDim.y +1;
-
-	if(blockIdx.x==0)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx = 0;
-		gy = threadIdx.y;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		__syncthreads();
-		}
-	}
-	else if(blockIdx.x==1)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==2)
-	{
-
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==3)
-	{
-		for(int gz = 0; gz < n;gz++)
-		{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-
-
-
-
-	else if(blockIdx.x==4)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx = 0;
-		gy = threadIdx.y;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==5)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=n-1;
-		gy=threadIdx.y;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy < n)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy+=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==6)
-	{
-
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=0;
-		gy=n-threadIdx.y-1;
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx++;
-				if(gx==n)
-				{
-					gx=0;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-	else if(blockIdx.x==7)
-	{
-		for(int gz = n-1; gz > -1;gz--)
-		{
-		gx=n-1;
-		gy=n-threadIdx.y-1;
-
-		for(int k = 0; k < n*blockCount + blockDim.y; k++)
-		{
-			if(threadIdx.y  < k+1 && gy > -1)
-			{
-				solver->updateValue(gx,gy,gz);
-				gx--;
-				if(gx==-1)
-				{
-					gx=n-1;
-					gy-=blockDim.y;
-				}
-			}
-
-
-			__syncthreads();
-		}
-		}
-	}
-
-
-
-
-}
-
-
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver)
-{
-	int gx = threadIdx.x + blockDim.x*blockIdx.x;
-	int gy = blockDim.y*blockIdx.y + threadIdx.y;
-	int gz = blockDim.z*blockIdx.z + threadIdx.z;
-
-	if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz)
-	{
-		solver->initGrid(gx,gy,gz);
-	}
-
-
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	a = be/al;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-al;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//
-//
-//
-//
-//
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-al;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	Real al,be, a,b,c,s;
-//	al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]));
-//
-//	be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/
-//			(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]-
-//			 cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]));
-//
-//	a = al-be;
-//	b=1.0;
-//	c=-be;
-//	s= h/sqrt(a*a+b*b);
-//
-//
-//	cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//
-//}
-//
-//template< typename MeshReal,
-//          typename Device,
-//          typename MeshIndex,
-//          typename Real,
-//          typename Index >
-//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j)
-//{
-//	Index index = Mesh.getCellIndex(CoordinatesType(i,j));
-//	cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]);
-//	cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]);
-//}
-#endif
-
-
-
-
-#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h
deleted file mode 100644
index 6e63d527b92e0b5c7907a5a2e8b24cba7ed432f2..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand2D_impl.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND3D_IMPL_H_
-#define TNLNARROWBAND3D_IMPL_H_
-
-#include "tnlNarrowBand.h"
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType()
-{
-	   return String( "tnlNarrowBand< " ) +
-	          MeshType::getType() + ", " +
-	          ::getType< Real >() + ", " +
-	          ::getType< Index >() + " >";
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand()
-:Entity(Mesh),
- dofVector(Mesh),
- dofVector2(Mesh)
-{
-}
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters )
-{
-	const String& meshFile = parameters.getParameter< String >( "mesh" );
-
-	if( ! Mesh.load( meshFile ) )
-	{
-		  std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-
-
-	const String& initialCondition = parameters.getParameter <String>("initial-condition");
-	if( ! dofVector.load( initialCondition ) )
-	{
-		  std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl;
-		   return false;
-	}
-	dofVector2.load(initialCondition);
-
-	h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >();
-	Entity.refresh();
-
-	const String& exact_input = parameters.getParameter< String >( "exact-input" );
-
-	if(exact_input == "no")
-		exactInput=false;
-	else
-		exactInput=true;
-//	cout << "bla "<<endl;
-	return initGrid();
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid()
-{
-	for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++)
-	{
-
-		if (abs(dofVector[i]) < 1.8*h)
-			dofVector2[i]=dofVector[i];
-		else
-			dofVector2[i]=INT_MAX*sign(dofVector[i]);
-	}
-
-	return true;
-}
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run()
-{
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index k = 0; k < Mesh.getDimensions().z(); k++)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-
-
-
-
-
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = 0; j < Mesh.getDimensions().y(); j++)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-	for(Index k = Mesh.getDimensions().z() -1; k > -1; k--)
-	{
-		for(Index i = 0; i < Mesh.getDimensions().x(); i++)
-		{
-			for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--)
-			{
-				updateValue(i,j,k);
-			}
-		}
-	}
-
-/*---------------------------------------------------------------------------------------------------------------------------*/
-
-
-	dofVector2.save("u-00001.tnl");
-
-	cout << "bla 3"<<endl;
-	return true;
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k)
-{
-	this->Entity.setCoordinates(CoordinatesType(i,j,k));
-	this->Entity.refresh();
-	tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity);
-	Real value = dofVector2[Entity.getIndex()];
-	Real a,b,c, tmp;
-
-	if( i == 0 )
-		a = dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()];
-	else if( i == Mesh.getDimensions().x() - 1 )
-		a = dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0 >()];
-	else
-	{
-		a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1,  0,  0>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 1,  0,  0>()] );
-	}
-
-	if( j == 0 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()];
-	else if( j == Mesh.getDimensions().y() - 1 )
-		b = dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()];
-	else
-	{
-		b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  -1,  0>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  1,  0>()] );
-	}
-
-	if( k == 0 )
-		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()];
-	else if( k == Mesh.getDimensions().z() - 1 )
-		c = dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()];
-	else
-	{
-		c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0,  0,  -1>()],
-				 dofVector2[neighborEntities.template getEntityIndex< 0,  0,  1>()] );
-	}
-
-	Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c);
-
-	if(hD < 0.0)
-		tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h;
-	else
-		tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) );
-
-
-	dofVector2[Entity.getIndex()]  = fabsMin(value, tmp);
-}
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y)
-{
-	Real fx = fabs(x);
-	Real fy = fabs(y);
-
-	Real tmpMin = Min(fx,fy);
-
-	if(tmpMin == fx)
-		return x;
-	else
-		return y;
-
-}
-
-
-
-#endif /* TNLNARROWBAND_IMPL_H_ */
diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h
deleted file mode 100644
index ca9b1da2cc6e26b14bc003532b6eea75e89d907d..0000000000000000000000000000000000000000
--- a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/***************************************************************************
-                          tnlNarrowBand_CUDA.h  -  description
-                             -------------------
-    begin                : Oct 15 , 2015
-    copyright            : (C) 2015 by Tomas Sobotik
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
-#ifndef TNLNARROWBAND_H_
-#define TNLNARROWBAND_H_
-
-#include <TNL/Config/ParameterContainer.h>
-#include <TNL/Containers/Vector.h>
-#include <TNL/Containers/StaticVector.h>
-#include <TNL/Devices/Host.h>
-#include <mesh/tnlGrid.h>
-#include <mesh/grids/tnlGridEntity.h>
-
-#include <functions/tnlMeshFunction.h>
-#include <limits.h>
-#include <core/tnlDevice.h>
-#include <ctime>
-
-
-
-
-
-template< typename Mesh,
-		  typename Real,
-		  typename Index >
-class tnlNarrowBand
-{};
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 2, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-	tnlNarrowBand();
-
-        static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-	bool run();
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-	RealType positivePart(const RealType arg) const;
-#ifdef HAVE_CUDA
-   __device__ __host__
-#endif
-	RealType negativePart(const RealType arg) const;
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid();
-	__device__ void updateValue(const Index i, const Index j);
-	__device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	int* cudaStatusVector;
-	int counter;
-	int* reinitialize;
-	__device__ void setupSquare1000(Index i, Index j);
-	__device__ void setupSquare1100(Index i, Index j);
-	__device__ void setupSquare1010(Index i, Index j);
-	__device__ void setupSquare1001(Index i, Index j);
-	__device__ void setupSquare1110(Index i, Index j);
-	__device__ void setupSquare1101(Index i, Index j);
-	__device__ void setupSquare1011(Index i, Index j);
-	__device__ void setupSquare1111(Index i, Index j);
-	__device__ void setupSquare0000(Index i, Index j);
-	__device__ void setupSquare0100(Index i, Index j);
-	__device__ void setupSquare0010(Index i, Index j);
-	__device__ void setupSquare0001(Index i, Index j);
-	__device__ void setupSquare0110(Index i, Index j);
-	__device__ void setupSquare0101(Index i, Index j);
-	__device__ void setupSquare0011(Index i, Index j);
-	__device__ void setupSquare0111(Index i, Index j);
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-	int statusGridSize;
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data;
-
-
-	RealType h, tau, finalTime;
-
-
-};
-
-
-
-
-
-
-
-
-
-template< typename MeshReal,
-          typename Device,
-          typename MeshIndex,
-          typename Real,
-          typename Index >
-class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
-{
-
-public:
-	typedef Real RealType;
-	typedef Device DeviceType;
-	typedef Index IndexType;
-	typedef tnlGrid< 3, Real, Device, Index > MeshType;
-	typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType;
-	typedef typename MeshType::CoordinatesType CoordinatesType;
-
-
-
-	static String getType();
-	bool init( const Config::ParameterContainer& parameters );
-	bool run();
-
-#ifdef HAVE_CUDA
-	__device__ bool initGrid(int i, int j, int k);
-	__device__ void updateValue(const Index i, const Index j, const Index k);
-	__device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3);
-	__device__ Real fabsMin(const Real x, const Real y);
-
-	tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver;
-	double* cudaDofVector;
-	double* cudaDofVector2;
-	int counter;
-#endif
-
-	MeshType Mesh;
-
-protected:
-
-
-
-	bool exactInput;
-
-	tnlMeshFunction<MeshType> dofVector;
-	DofVectorType data;
-
-	RealType h;
-
-
-};
-
-
-
-
-
-
-
-#ifdef HAVE_CUDA
-//template<int sweep_t>
-__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i);
-
-__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-
-__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-__global__ void initSetupGrid1_2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver);
-__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau);
-//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver);
-#endif
-
-
-
-#include "tnlNarrowBand2D_CUDA_v4_impl.h"
-//											#include "tnlNarrowBand3D_CUDA_impl.h"
-
-#endif /* TNLNARROWBAND_H_ */