diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 983bc5784b130835a146fff6d3fa278f226bbbb1..ee2a49b1eee6efd46f81dd0cb55c60a14a0d54f6 100755
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -4,3 +4,4 @@ add_subdirectory( navier-stokes )
 #add_subdirectory( hamilton-jacobi )
 add_subdirectory( hamilton-jacobi-parallel )
 add_subdirectory( fast-sweeping )
+add_subdirectory( hamilton-jacobi-parallel-map )
diff --git a/examples/hamilton-jacobi-parallel-map/CMakeLists.txt b/examples/hamilton-jacobi-parallel-map/CMakeLists.txt
new file mode 100755
index 0000000000000000000000000000000000000000..6ef21c42cae891037ccdc1ad1423a3de0ab8401c
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/CMakeLists.txt
@@ -0,0 +1,22 @@
+set( tnl_hamilton_jacobi_parallel_map_SOURCES
+#     MainBuildConfig.h
+#     tnlParallelMapSolver2D_impl.h
+#     tnlParallelMapSolver.h
+#     parallelMapConfig.h 
+     main.cpp)
+
+
+IF(  BUILD_CUDA ) 
+	CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel-map${debugExt} main.cu)
+ELSE(  BUILD_CUDA )                
+	ADD_EXECUTABLE(hamilton-jacobi-parallel-map${debugExt} main.cpp)
+ENDIF( BUILD_CUDA )
+target_link_libraries (hamilton-jacobi-parallel-map${debugExt} tnl${debugExt}-${tnlVersion} )
+
+
+INSTALL( TARGETS hamilton-jacobi-parallel-map${debugExt}
+         RUNTIME DESTINATION bin
+         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
+        
+#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_map_SOURCES}
+#         DESTINATION share/tnl-${tnlVersion}/examples/hamilton-jacobi-parallel-map )
diff --git a/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h b/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b904c0c8b1d096a72a6ee8c3cc3cd1979d164b4
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+                          MainBuildConfig.h  -  description
+                             -------------------
+    begin                : Jul 7, 2014
+    copyright            : (C) 2014 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MAINBUILDCONFIG_H_
+#define MAINBUILDCONFIG_H_
+
+#include <solvers/tnlBuildConfigTags.h>
+
+class MainBuildConfig
+{
+   public:
+
+      static void print() { cerr << "MainBuildConfig" << endl; }
+};
+
+/****
+ * Turn off support for float and long double.
+ */
+template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; };
+template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; };
+
+/****
+ * Turn off support for short int and long int indexing.
+ */
+template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; };
+template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; };
+
+/****
+ * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types.
+ */
+template< int Dimensions, typename Real, typename Device, typename Index >
+   struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > >
+      { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled  &&
+                         tnlConfigTagReal< MainBuildConfig, Real >::enabled &&
+                         tnlConfigTagDevice< MainBuildConfig, Device >::enabled &&
+                         tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; };
+
+/****
+ * Please, chose your preferred time discretisation  here.
+ */
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; };
+template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
+
+/****
+ * Only the Runge-Kutta-Merson solver is enabled by default.
+ */
+template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; };
+
+#endif /* MAINBUILDCONFIG_H_ */
diff --git a/examples/hamilton-jacobi-parallel-map/Makefile~ b/examples/hamilton-jacobi-parallel-map/Makefile~
new file mode 100644
index 0000000000000000000000000000000000000000..d996b5c90703680cd0e9cf334eb70d311caaa8f3
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/Makefile~
@@ -0,0 +1,41 @@
+TNL_VERSION=0.1
+TNL_INSTALL_DIR=${HOME}/local/lib
+TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION}
+
+TARGET = hamiltonJacobiParallelSolver
+#CONFIG_FILE = $(TARGET).cfg.desc
+INSTALL_DIR = ${HOME}/local
+CXX = g++
+CUDA_CXX = nvcc
+OMP_FLAGS = -DHAVE_OPENMP -fopenmp
+CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG
+LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp
+
+SOURCES = main.cpp
+HEADERS = 
+OBJECTS = main.o
+DIST = $(SOURCES) Makefile
+
+all: $(TARGET)
+clean: 
+	rm -f $(OBJECTS)
+	rm -f $(TARGET)-conf.h	
+
+dist: $(DIST)
+	tar zcvf $(TARGET).tgz $(DIST) 
+
+install: $(TARGET)
+	cp $(TARGET) $(INSTALL_DIR)/bin
+	cp $(CONFIG_FILE) $(INSTALL_DIR)/share
+
+uninstall: $(TARGET)
+	rm -f $(INSTALL_DIR)/bin/$(TARGET) 
+	rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share
+
+$(TARGET): $(OBJECTS)
+	$(CUDA_CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS)
+
+%.o: %.cpp $(HEADERS)
+	$(CXX) -c -o $@ $(CXX_FLAGS) $<
+
+
diff --git a/examples/hamilton-jacobi-parallel-map/main.cpp b/examples/hamilton-jacobi-parallel-map/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b13498e17330fae7bb00a0bdc2abcc7a19f8e7a8
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/main.cpp
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cpp  -  description
+                             -------------------
+    begin                : Jul 8 , 2014
+    copyright            : (C) 2014 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/hamilton-jacobi-parallel-map/main.cu b/examples/hamilton-jacobi-parallel-map/main.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7101976712e153d73c5f0979b211164a36ec648d
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/main.cu
@@ -0,0 +1,17 @@
+/***************************************************************************
+                          main.cu  -  description
+                             -------------------
+    begin                : Mar 30 , 2015
+    copyright            : (C) 2015 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "main.h"
diff --git a/examples/hamilton-jacobi-parallel-map/main.h b/examples/hamilton-jacobi-parallel-map/main.h
new file mode 100644
index 0000000000000000000000000000000000000000..43c2048920929d96e7787c7d4e365400dd88c972
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/main.h
@@ -0,0 +1,99 @@
+/***************************************************************************
+                          main.h  -  description
+                             -------------------
+    begin                : Mar 22 , 2016
+    copyright            : (C) 2016 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "tnlParallelMapSolver.h"
+#include "parallelMapConfig.h"
+#include "MainBuildConfig.h"
+#include <solvers/tnlBuildConfigTags.h>
+#include <operators/godunov-eikonal/parallelGodunovMap.h>
+#include <mesh/tnlGrid.h>
+#include <core/tnlDevice.h>
+#include <time.h>
+#include <ctime>
+
+typedef MainBuildConfig BuildConfig;
+
+int main( int argc, char* argv[] )
+{
+	time_t start;
+	time_t stop;
+	time(&start);
+	std::clock_t start2= std::clock();
+   tnlParameterContainer parameters;
+   tnlConfigDescription configDescription;
+   parallelMapConfig< BuildConfig >::configSetup( configDescription );
+
+   if( ! parseCommandLine( argc, argv, configDescription, parameters ) )
+      return false;
+
+
+   tnlDeviceEnum device;
+   device = tnlHostDevice;
+
+   const int& dim = parameters.getParameter< int >( "dim" );
+
+  if(dim == 2)
+  {
+
+	   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeHost;
+		/*#ifdef HAVE_CUDA
+		   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice;
+		#endif
+		#ifndef HAVE_CUDA*/
+	   typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeDevice;
+		/*#endif*/
+
+	   if(device==tnlHostDevice)
+	   {
+		   typedef tnlHost Device;
+
+
+		   tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
+		   if(!solver.init(parameters))
+		   {
+			   cerr << "Solver failed to initialize." << endl;
+			   return EXIT_FAILURE;
+		   }
+		   cout << "-------------------------------------------------------------" << endl;
+		   cout << "Starting solver loop..." << endl;
+		   solver.run();
+	   }
+	   else if(device==tnlCudaDevice )
+	   {
+		   typedef tnlCuda Device;
+		   //typedef parallelGodunovMapScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType;
+
+		   tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver;
+		   if(!solver.init(parameters))
+		   {
+			   cerr << "Solver failed to initialize." << endl;
+			   return EXIT_FAILURE;
+		   }
+		   cout << "-------------------------------------------------------------" << endl;
+		   cout << "Starting solver loop..." << endl;
+		   solver.run();
+	   }
+  // }
+  }
+
+
+   time(&stop);
+   cout << endl;
+   cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) << endl;
+   return EXIT_SUCCESS;
+}
+
+
diff --git a/examples/hamilton-jacobi-parallel-map/no-Makefile b/examples/hamilton-jacobi-parallel-map/no-Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..bfdc1ef236ca02ecfe6bc88f81d872e9524ec621
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/no-Makefile
@@ -0,0 +1,41 @@
+TNL_VERSION=0.1
+TNL_INSTALL_DIR=${HOME}/local/lib
+TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION}
+
+TARGET = hamiltonJacobiParallelSolver
+#CONFIG_FILE = $(TARGET).cfg.desc
+INSTALL_DIR = ${HOME}/local
+CXX = g++
+CUDA_CXX = nvcc
+OMP_FLAGS = -DHAVE_OPENMP -fopenmp
+CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG
+LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp
+
+SOURCES = main.cpp
+HEADERS = 
+OBJECTS = main.o
+DIST = $(SOURCES) Makefile
+
+all: $(TARGET)
+clean: 
+	rm -f $(OBJECTS)
+	rm -f $(TARGET)-conf.h	
+
+dist: $(DIST)
+	tar zcvf $(TARGET).tgz $(DIST) 
+
+install: $(TARGET)
+	cp $(TARGET) $(INSTALL_DIR)/bin
+	cp $(CONFIG_FILE) $(INSTALL_DIR)/share
+
+uninstall: $(TARGET)
+	rm -f $(INSTALL_DIR)/bin/$(TARGET) 
+	rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share
+
+$(TARGET): $(OBJECTS)
+	$(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS)
+
+%.o: %.cpp $(HEADERS)
+	$(CXX) -c -o $@ $(CXX_FLAGS) $<
+
+
diff --git a/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h b/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h
new file mode 100644
index 0000000000000000000000000000000000000000..e67859e9d6f7707d331099425dc534da67e0b1ef
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h
@@ -0,0 +1,47 @@
+/***************************************************************************
+                          parallelMapConfig.h  -  description
+                             -------------------
+    begin                : Mar 22 , 2016
+    copyright            : (C) 2016 by Tomas Sobotik
+    email                :
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_
+#define HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_
+
+#include <config/tnlConfigDescription.h>
+
+template< typename ConfigTag >
+class parallelMapConfig
+{
+   public:
+      static void configSetup( tnlConfigDescription& config )
+      {
+         config.addDelimiter( "Parallel Eikonal solver settings:" );
+         config.addEntry        < tnlString > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" );
+         config.addEntry       < tnlString > ( "scheme", "This defines scheme used for discretization.", "godunov" );
+         config.addEntryEnum( "godunov" );
+         config.addEntryEnum( "upwind" );
+         config.addRequiredEntry        < tnlString > ( "initial-condition", "Initial condition for solver");
+         config.addRequiredEntry        < tnlString > ( "map", "Gradient map for solver");
+         config.addEntry       < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" );
+         config.addEntry        < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 );
+         config.addEntry        < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 );
+         config.addRequiredEntry        < double > ( "stop-time", " Final time for solver");
+         config.addRequiredEntry        < double > ( "initial-tau", " initial tau for solver" );
+         config.addEntry        < double > ( "cfl-condition", " CFL condition", 0.0 );
+         config.addEntry        < int > ( "subgrid-size", "Subgrid size.", 16 );
+         config.addRequiredEntry        < int > ( "dim", "Dimension of problem.");
+      }
+};
+
+#endif /* HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ */
diff --git a/examples/hamilton-jacobi-parallel-map/run b/examples/hamilton-jacobi-parallel-map/run
new file mode 100755
index 0000000000000000000000000000000000000000..3aece294a9c1189cd885acbe459dba20be713716
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/run
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+#GRID_SIZES="0897"
+GRID_SIZES="0008 0015 0029 0057 0113 0225 0449"
+#GRID_SIZES="1793"
+
+dimensions=2
+
+size=2
+
+time=3
+
+for grid_size in $GRID_SIZES;
+
+do
+
+	rm -r grid-${grid_size}
+   	mkdir grid-${grid_size}
+   	cd grid-${grid_size}
+
+	tnl-grid-setup --dimensions $dimensions \
+	               --origin-x -1.0 \
+	               --origin-y -1.0 \
+	               --origin-z -1.0 \
+	               --proportions-x $size \
+	               --proportions-y $size \
+	               --proportions-z $size \
+	               --size-x ${grid_size} \
+	               --size-y ${grid_size} \
+	               --size-z ${grid_size}
+
+	tnl-init --test-function sdf-para \
+		     --offset 0.25 \
+	             --output-file init.tnl \
+		     --final-time 0.0 \
+		     --snapshot-period 0.1 \
+
+
+	tnl-init --test-function sdf-para-sdf \
+		     --offset 0.25 \
+	             --output-file sdf.tnl \
+		     --final-time 0.0 \
+		     --snapshot-period 0.1
+
+	hamilton-jacobi-parallel --initial-condition init.tnl \
+	              --cfl-condition 1.0e-1 \
+		      	  --mesh mesh.tnl \
+		     	  --initial-tau 1.0e-3 \
+		      	  --epsilon 1.0 \
+	        	  --delta 0.0 \
+	       	      --stop-time $time \
+		          --scheme godunov \
+		          --subgrid-size 8
+
+        tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff
+	
+	cd ..
+
+done
+
+
+./tnl-err2eoc-2.py --format txt --size $size *.diff
+
+              
diff --git a/examples/hamilton-jacobi-parallel-map/run~ b/examples/hamilton-jacobi-parallel-map/run~
new file mode 100755
index 0000000000000000000000000000000000000000..236669a39749dd09b5870242d965df65295bb6fc
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/run~
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+#GRID_SIZES="0897"
+#GRID_SIZES="0449"
+GRID_SIZES="1793"
+
+dimensions=2
+
+size=2
+
+time=3
+
+for grid_size in $GRID_SIZES;
+
+do
+
+	rm -r grid-${grid_size}
+   	mkdir grid-${grid_size}
+   	cd grid-${grid_size}
+
+	tnl-grid-setup --dimensions $dimensions \
+	               --origin-x -1.0 \
+	               --origin-y -1.0 \
+	               --origin-z -1.0 \
+	               --proportions-x $size \
+	               --proportions-y $size \
+	               --proportions-z $size \
+	               --size-x ${grid_size} \
+	               --size-y ${grid_size} \
+	               --size-z ${grid_size}
+
+	tnl-init --test-function sdf-para \
+		     --offset 0.25 \
+	             --output-file init.tnl \
+		     --final-time 0.0 \
+		     --snapshot-period 0.1 \
+
+
+	tnl-init --test-function sdf-para-sdf \
+		     --offset 0.25 \
+	             --output-file sdf.tnl \
+		     --final-time 0.0 \
+		     --snapshot-period 0.1
+
+	hamilton-jacobi-parallel --initial-condition init.tnl \
+	              --cfl-condition 1.0e-1 \
+		      	  --mesh mesh.tnl \
+		     	  --initial-tau 1.0e-3 \
+		      	  --epsilon 1.0 \
+	        	  --delta 0.0 \
+	       	      --stop-time $time \
+		          --scheme godunov \
+		          --subgrid-size 8
+
+        tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff
+	
+	cd ..
+
+done
+
+
+./tnl-err2eoc-2.py --format txt --size $size *.diff
+
+              
diff --git a/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py
new file mode 100755
index 0000000000000000000000000000000000000000..f8cde3768e9b76156507e133f8bc3ecaa526fc71
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+import sys, string, math
+
+arguments = sys. argv[1:]
+format = "txt"
+output_file_name = "eoc-table.txt"
+input_files = []
+verbose = 1
+size = 1.0
+
+i = 0
+while i < len( arguments ):
+   if arguments[ i ] == "--format":
+      format = arguments[ i + 1 ]
+      i = i + 2
+      continue
+   if arguments[ i ] == "--output-file":
+      output_file_name = arguments[ i + 1 ]
+      i = i + 2
+      continue
+   if arguments[ i ] == "--verbose":
+       verbose = float( arguments[ i + 1 ] )
+       i = i +2
+       continue
+   if arguments[ i ] == "--size":
+       size = float( arguments[ i + 1 ] )
+       i = i +2
+       continue
+   input_files. append( arguments[ i ] )
+   i = i + 1
+
+if not verbose == 0:
+   print "Writing to " + output_file_name + " in " + format + "."
+
+h_list = []
+l1_norm_list = []
+l2_norm_list = []
+max_norm_list = []
+items = 0
+
+for file_name in input_files:
+   if not verbose == 0:
+       print "Processing file " + file_name
+   file = open( file_name, "r" )
+   
+   l1_max = 0.0
+   l_max_max = 0.0
+   file.readline();
+   file.readline();
+   for line in file. readlines():
+         data = string. split( line )
+         h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) )
+         l1_norm_list. append( float( data[ 1 ] ) )
+         l2_norm_list. append( float( data[ 2 ] ) )
+         max_norm_list. append( float( data[ 3 ] ) )
+         items = items + 1
+         if not verbose == 0:
+            print line
+   file. close()
+
+h_width = 12
+err_width = 15
+file = open( output_file_name, "w" )
+if format == "latex":
+      file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" )
+      file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" )
+      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
+      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" )
+      file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" )
+      file. write( " " + string. rjust( " ", h_width ) + "&" +
+                string. rjust( "Error", err_width ) + "&" +
+                string. rjust( "{\\bf EOC}", err_width ) + "&" +
+                string. rjust( "Error", err_width ) + "&" +
+                string. rjust( "{\\bf EOC}", err_width ) + "&" +
+                string. rjust( "Error.", err_width ) + "&" +
+                string. rjust( "{\\bf EOC}", err_width ) +
+                "\\\\ \\hline \\hline \n")
+if format == "txt":
+    file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
+    file. write( "|       h      |     L1 Err.    |     L1 EOC.    |     L2 Err.    |      L2 EOC    |    MAX Err.    |     MAX EOC    |\n" )
+    file. write( "+==============+================+================+================+================+================+================+\n" )
+                  
+
+i = 0
+while i < items:
+   if i == 0:
+      if format == "latex":
+         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
+                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + 
+                      string. rjust( " ", err_width ) + "&"+ 
+                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
+                      string. rjust( " ", err_width ) + "&" +
+                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
+                      string. rjust( " ", err_width ) + "\\\\\n" )
+      if format == "txt":
+         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + 
+                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( " ", err_width ) + " |" +
+                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( " ", err_width ) + " |" +
+                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( " ", err_width ) + " |\n" )
+         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
+      i = i + 1;
+      continue
+   if h_list[ i ] == h_list[ i - 1 ]:
+      print "Unable to count eoc since h[ " + \
+      str( i ) + " ] = h[ " + str( i - 1 ) + \
+      " ] = " + str( h_list[ i ] ) + ". \n"
+      file. write( " eoc error:  h[ " + \
+      str( i ) + " ] = h[ " + str( i - 1 ) + \
+      " ] = " + str( h_list[ i ] ) + ". \n" )
+   else:
+      h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] )
+      l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] )
+      l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] )
+      max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] )
+      if format == "latex":
+         file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" +
+                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" +
+                      string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" +
+                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" +
+                      string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" +
+                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" +
+                      string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" )
+      if format == "txt":
+         file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" +
+                      string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" +
+                      string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" +
+                      string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" +
+                      string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" )
+         file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" )
+   i = i + 1
+
+if format == "latex":
+   file. write( "\\hline \n" )
+   file. write( "\\end{tabular} \n" )
+    
diff --git a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c5ca2fd6f3cc1a8ea34431d66d1742bc1554c
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h
@@ -0,0 +1,365 @@
+/***************************************************************************
+                          tnlParallelMapSolver.h  -  description
+                             -------------------
+    begin                : Mar 22 , 2016
+    copyright            : (C) 2016 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef TNLPARALLELMAPSOLVER_H_
+#define TNLPARALLELMAPSOLVER_H_
+
+#include <config/tnlParameterContainer.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlStaticVector.h>
+#include <functions/tnlMeshFunction.h>
+#include <core/tnlHost.h>
+#include <mesh/tnlGrid.h>
+#include <mesh/grids/tnlGridEntity.h>
+#include <limits.h>
+#include <core/tnlDevice.h>
+
+
+#include <ctime>
+
+#ifdef HAVE_CUDA
+#include <cuda.h>
+#include <core/tnlCuda.h>
+#endif
+
+
+template< int Dimension,
+		  typename SchemeHost,
+		  typename SchemeDevice,
+		  typename Device,
+		  typename RealType = double,
+          typename IndexType = int >
+class tnlParallelMapSolver
+{};
+
+template<typename SchemeHost, typename SchemeDevice, typename Device>
+class tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >
+{
+public:
+
+	typedef SchemeDevice SchemeTypeDevice;
+	typedef SchemeHost SchemeTypeHost;
+	typedef Device DeviceType;
+	typedef tnlVector< double, tnlHost, int > VectorType;
+	typedef tnlVector< int, tnlHost, int > IntVectorType;
+	typedef tnlGrid< 2, double, tnlHost, int > MeshType;
+#ifdef HAVE_CUDA
+	typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
+	typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
+	typedef tnlGrid< 2, double, tnlHost, int > MeshTypeCUDA;
+#endif
+	tnlParallelMapSolver();
+	bool init( const tnlParameterContainer& parameters );
+	void run();
+
+	void test();
+
+/*private:*/
+
+
+	void synchronize();
+
+	int getOwner( int i) const;
+
+	int getSubgridValue( int i ) const;
+
+	void setSubgridValue( int i, int value );
+
+	int getBoundaryCondition( int i ) const;
+
+	void setBoundaryCondition( int i, int value );
+
+	void stretchGrid();
+
+	void contractGrid();
+
+	VectorType getSubgrid( const int i ) const;
+
+	void insertSubgrid( VectorType u, const int i );
+
+	VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);
+
+
+	tnlMeshFunction<MeshType> u0;
+	VectorType work_u, map_stretched, map;
+	IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
+	MeshType mesh, subMesh;
+
+//	tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity;
+
+	SchemeHost schemeHost;
+	SchemeDevice schemeDevice;
+	double delta, tau0, stopTime,cflCondition;
+	int gridRows, gridCols, gridLevels, currentStep, n;
+
+	std::clock_t start;
+	double time_diff;
+
+
+	tnlDeviceEnum device;
+
+	tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
+	{
+		return this;
+	};
+
+#ifdef HAVE_CUDA
+
+	tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;
+
+	double* work_u_cuda;
+	double* map_stretched_cuda;
+
+	int* subgridValues_cuda;
+	int* boundaryConditions_cuda;
+	int* unusedCell_cuda;
+	int* calculationsCount_cuda;
+	double* tmpw;
+	double* tmp_map;
+	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
+	//SchemeDevice scheme_cuda;
+	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
+	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;
+
+	int* runcuda;
+	int run_host;
+
+
+	__device__ void getSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
+
+	__device__ void updateSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
+
+	__device__ void insertSubgridCUDA2D( double u, const int i );
+
+	__device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID);
+
+	/*__global__ void runCUDA();*/
+
+	//__device__ void synchronizeCUDA();
+
+	__device__ int getOwnerCUDA2D( int i) const;
+
+	__device__ int getSubgridValueCUDA2D( int i ) const;
+
+	__device__ void setSubgridValueCUDA2D( int i, int value );
+
+	__device__ int getBoundaryConditionCUDA2D( int i ) const;
+
+	__device__ void setBoundaryConditionCUDA2D( int i, int value );
+
+	//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+
+	/*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/
+
+#endif
+
+};
+
+
+
+
+
+
+
+	template<typename SchemeHost, typename SchemeDevice, typename Device>
+	class tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >
+	{
+	public:
+
+		typedef SchemeDevice SchemeTypeDevice;
+		typedef SchemeHost SchemeTypeHost;
+		typedef Device DeviceType;
+		typedef tnlVector< double, tnlHost, int > VectorType;
+		typedef tnlVector< int, tnlHost, int > IntVectorType;
+		typedef tnlGrid< 3, double, tnlHost, int > MeshType;
+	#ifdef HAVE_CUDA
+		typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
+		typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
+		typedef tnlGrid< 3, double, tnlHost, int > MeshTypeCUDA;
+	#endif
+		tnlParallelMapSolver();
+		bool init( const tnlParameterContainer& parameters );
+		void run();
+
+		void test();
+
+	/*private:*/
+
+
+		void synchronize();
+
+		int getOwner( int i) const;
+
+		int getSubgridValue( int i ) const;
+
+		void setSubgridValue( int i, int value );
+
+		int getBoundaryCondition( int i ) const;
+
+		void setBoundaryCondition( int i, int value );
+
+		void stretchGrid();
+
+		void contractGrid();
+
+		VectorType getSubgrid( const int i ) const;
+
+		void insertSubgrid( VectorType u, const int i );
+
+		VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);
+
+
+		tnlMeshFunction<MeshType> u0;
+		VectorType work_u;
+		IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
+		MeshType mesh, subMesh;
+		SchemeHost schemeHost;
+		SchemeDevice schemeDevice;
+		double delta, tau0, stopTime,cflCondition;
+		int gridRows, gridCols, gridLevels, currentStep, n;
+
+		std::clock_t start;
+		double time_diff;
+
+
+		tnlDeviceEnum device;
+
+		tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
+		{
+			return this;
+		};
+
+#ifdef HAVE_CUDA
+
+	tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;
+
+	double* work_u_cuda;
+
+	int* subgridValues_cuda;
+	int* boundaryConditions_cuda;
+	int* unusedCell_cuda;
+	int* calculationsCount_cuda;
+	double* tmpw;
+	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
+	//SchemeDevice scheme_cuda;
+	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
+	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;
+
+	int* runcuda;
+	int run_host;
+
+
+	__device__ void getSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
+
+	__device__ void updateSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);
+
+	__device__ void insertSubgridCUDA3D( double u, const int i );
+
+	__device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID);
+
+	/*__global__ void runCUDA();*/
+
+	//__device__ void synchronizeCUDA();
+
+	__device__ int getOwnerCUDA3D( int i) const;
+
+	__device__ int getSubgridValueCUDA3D( int i ) const;
+
+	__device__ void setSubgridValueCUDA3D( int i, int value );
+
+	__device__ int getBoundaryConditionCUDA3D( int i ) const;
+
+	__device__ void setBoundaryConditionCUDA3D( int i, int value );
+
+	//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+
+	/*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/
+
+#endif
+
+};
+
+
+
+
+
+
+#ifdef HAVE_CUDA
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void runCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void initRunCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void initCUDA2D( tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3, double* tmp_map_ptr);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+
+
+
+
+
+
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void runCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void initRunCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void initCUDA3D( tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void synchronizeCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__ void synchronize2CUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
+#endif
+
+
+__device__
+double fabsMin( double x, double y)
+{
+	double fx = abs(x);
+
+	if(Min(fx,abs(y)) == fx)
+		return x;
+	else
+		return y;
+}
+
+__device__
+double atomicFabsMin(double* address, double val)
+{
+	unsigned long long int* address_as_ull =
+						  (unsigned long long int*)address;
+	unsigned long long int old = *address_as_ull, assumed;
+	do {
+		assumed = old;
+			old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) ));
+	} while (assumed != old);
+	return __longlong_as_double(old);
+}
+
+
+#include "tnlParallelMapSolver2D_impl.h"
+#endif /* TNLPARALLELMAPSOLVER_H_ */
diff --git a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc611b950163b1a77309f54ff0a629b306fd54e5
--- /dev/null
+++ b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h
@@ -0,0 +1,1957 @@
+/***************************************************************************
+                          tnlParallelMapSolver2D_impl.h  -  description
+                             -------------------
+    begin                : Mar 22 , 2016
+    copyright            : (C) 2016 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef TNLPARALLELMAPSOLVER2D_IMPL_H_
+#define TNLPARALLELMAPSOLVER2D_IMPL_H_
+
+
+#include "tnlParallelMapSolver.h"
+#include <core/mfilename.h>
+
+
+
+
+#define MAP_SOLVER_MAX_VALUE 150
+
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver()
+{
+	cout << "a" << endl;
+	this->device = tnlCudaDevice;  /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice   ---    vypocet na CPU
+
+#ifdef HAVE_CUDA
+	if(this->device == tnlCudaDevice)
+	{
+	run_host = 1;
+	}
+#endif
+
+	cout << "b" << endl;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test()
+{
+/*
+	for(int i =0; i < this->subgridValues.getSize(); i++ )
+	{
+		insertSubgrid(getSubgrid(i), i);
+	}
+*/
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+
+bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const tnlParameterContainer& parameters )
+{
+	cout << "Initializating solver..." << endl;
+	const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh");
+	this->mesh.load( meshLocation );
+
+	this->n = parameters.getParameter <int>("subgrid-size");
+	cout << "Setting N to " << this->n << endl;
+
+	this->subMesh.setDimensions( this->n, this->n );
+	this->subMesh.setDomain( tnlStaticVector<2,double>(0.0, 0.0),
+							 tnlStaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) );
+
+	this->subMesh.save("submesh.tnl");
+
+	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+	this->u0.load( initialCondition );
+
+	/* LOAD MAP */
+	const tnlString& mapFile = parameters.getParameter <tnlString>("map");
+	if(! this->map.load( mapFile ))
+		cout << "Failed to load map file : " << mapFile << endl;
+
+	//cout << this->mesh.getCellCenter(0) << endl;
+
+	this->delta = parameters.getParameter <double>("delta");
+	this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >();
+
+	cout << "Setting delta to " << this->delta << endl;
+
+	this->tau0 = parameters.getParameter <double>("initial-tau");
+	cout << "Setting initial tau to " << this->tau0 << endl;
+	this->stopTime = parameters.getParameter <double>("stop-time");
+
+	this->cflCondition = parameters.getParameter <double>("cfl-condition");
+	this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >());
+	cout << "Setting CFL to " << this->cflCondition << endl;
+
+	stretchGrid();
+	this->stopTime /= (double)(this->gridCols);
+	this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0));
+	cout << "Setting stopping time to " << this->stopTime << endl;
+	//this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*this->mesh.template getSpaceStepsProducts< 1, 0 >();
+	//cout << "Setting stopping time to " << this->stopTime << endl;
+
+	cout << "Initializating scheme..." << endl;
+	if(!this->schemeHost.init(parameters))
+	{
+		cerr << "SchemeHost failed to initialize." << endl;
+		return false;
+	}
+	cout << "Scheme initialized." << endl;
+
+	test();
+
+	VectorType* tmp = new VectorType[subgridValues.getSize()];
+	bool containsCurve = false;
+
+#ifdef HAVE_CUDA
+
+	if(this->device == tnlCudaDevice)
+	{
+	/*cout << "Testing... " << endl;
+	if(this->device == tnlCudaDevice)
+	{
+	if( !initCUDA2D(parameters, gridRows, gridCols) )
+		return false;
+	}*/
+		//cout << "s" << endl;
+	cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >));
+	//cout << "s" << endl;
+	cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice);
+	//cout << "s" << endl;
+	double** tmpdev = NULL;
+	cudaMalloc(&tmpdev, sizeof(double*));
+	//double* tmpw;
+	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
+	cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double));
+	cudaMalloc(&(this->runcuda), sizeof(int));
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+	int* tmpUC;
+	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
+	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
+
+	initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+	//cout << "s " << endl;
+	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
+	double* tmpu = NULL;
+
+	cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
+	//printf("%p %p \n",tmpu,tmpw);
+	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
+	cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
+	//cout << "s "<< endl;
+
+	}
+#endif
+
+	if(this->device == tnlHostDevice)
+	{
+	for(int i = 0; i < this->subgridValues.getSize(); i++)
+	{
+
+		if(! tmp[i].setSize(this->n * this->n))
+			cout << "Could not allocate tmp["<< i <<"] array." << endl;
+			tmp[i] = getSubgrid(i);
+		containsCurve = false;
+
+		for(int j = 0; j < tmp[i].getSize(); j++)
+		{
+			if(tmp[i][0]*tmp[i][j] <= 0.0)
+			{
+				containsCurve = true;
+				j=tmp[i].getSize();
+			}
+
+		}
+		if(containsCurve)
+		{
+			//cout << "Computing initial SDF on subgrid " << i << "." << endl;
+			insertSubgrid(runSubgrid(0, tmp[i],i), i);
+			setSubgridValue(i, 4);
+			//cout << "Computed initial SDF on subgrid " << i  << "." << endl;
+		}
+		containsCurve = false;
+
+	}
+	}
+#ifdef HAVE_CUDA
+	else if(this->device == tnlCudaDevice)
+	{
+//		cout << "pre 1 kernel" << endl;
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		dim3 threadsPerBlock(this->n, this->n);
+		dim3 numBlocks(this->gridCols,this->gridRows);
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
+		cudaDeviceSynchronize();
+//		cout << "post 1 kernel" << endl;
+
+	}
+#endif
+
+
+	this->currentStep = 1;
+	if(this->device == tnlHostDevice)
+		synchronize();
+#ifdef HAVE_CUDA
+	else if(this->device == tnlCudaDevice)
+	{
+		dim3 threadsPerBlock(this->n, this->n);
+		dim3 numBlocks(this->gridCols,this->gridRows);
+		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
+		//cout << test[0] <<"   " << test[1] <<"   " << test[2] <<"   " << test[3] << endl;
+		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+		//cout << this->tmpw << "   " <<  test[0] <<"   " << test[1] << "   " <<test[2] << "   " <<test[3] << endl;
+
+		checkCudaDevice;
+
+		synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		//cout << test[0] << "   " <<test[1] <<"   " << test[2] << "   " <<test[3] << endl;
+		//cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+		//checkCudaDevice;
+		//cout << this->tmpw << "   " <<  test[0] << "   " <<test[1] << "   " <<test[2] <<"   " << test[3] << endl;
+		//free(test);
+
+	}
+
+#endif
+	cout << "Solver initialized." << endl;
+
+	return true;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run()
+{
+	if(this->device == tnlHostDevice)
+	{
+
+	bool end = false;
+	while ((this->boundaryConditions.max() > 0 ) || !end)
+	{
+		if(this->boundaryConditions.max() == 0 )
+			end=true;
+		else
+			end=false;
+#ifdef HAVE_OPENMP
+#pragma omp parallel for num_threads(4) schedule(dynamic)
+#endif
+		for(int i = 0; i < this->subgridValues.getSize(); i++)
+		{
+			if(getSubgridValue(i) != INT_MAX)
+			{
+				//cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) << endl;
+
+				if(getSubgridValue(i) == currentStep+4)
+				{
+
+				if(getBoundaryCondition(i) & 1)
+				{
+					insertSubgrid( runSubgrid(1, getSubgrid(i),i), i);
+					this->calculationsCount[i]++;
+				}
+				if(getBoundaryCondition(i) & 2)
+				{
+					insertSubgrid( runSubgrid(2, getSubgrid(i),i), i);
+					this->calculationsCount[i]++;
+				}
+				if(getBoundaryCondition(i) & 4)
+				{
+					insertSubgrid( runSubgrid(4, getSubgrid(i),i), i);
+					this->calculationsCount[i]++;
+				}
+				if(getBoundaryCondition(i) & 8)
+				{
+					insertSubgrid( runSubgrid(8, getSubgrid(i),i), i);
+					this->calculationsCount[i]++;
+				}
+				}
+
+				if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 1)//)
+					/*	&&(!(getBoundaryCondition(i) & 5) && !(getBoundaryCondition(i) & 10)) */)
+				{
+					//cout << "3 @ " << getBoundaryCondition(i) << endl;
+					insertSubgrid( runSubgrid(3, getSubgrid(i),i), i);
+				}
+				if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 1)//)
+					/*	&&(!(getBoundaryCondition(i) & 3) && !(getBoundaryCondition(i) & 12)) */)
+				{
+					//cout << "5 @ " << getBoundaryCondition(i) << endl;
+					insertSubgrid( runSubgrid(5, getSubgrid(i),i), i);
+				}
+				if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 8)//)
+					/*	&&(!(getBoundaryCondition(i) & 12) && !(getBoundaryCondition(i) & 3))*/ )
+				{
+					//cout << "10 @ " << getBoundaryCondition(i) << endl;
+					insertSubgrid( runSubgrid(10, getSubgrid(i),i), i);
+				}
+				if(   ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 8)//)
+					/*&&(!(getBoundaryCondition(i) & 10) && !(getBoundaryCondition(i) & 5)) */)
+				{
+					//cout << "12 @ " << getBoundaryCondition(i) << endl;
+					insertSubgrid( runSubgrid(12, getSubgrid(i),i), i);
+				}
+
+
+				/*if(getBoundaryCondition(i))
+				{
+					insertSubgrid( runSubgrid(15, getSubgrid(i),i), i);
+				}*/
+
+				setBoundaryCondition(i, 0);
+
+				setSubgridValue(i, getSubgridValue(i)-1);
+
+			}
+		}
+		synchronize();
+	}
+	}
+#ifdef HAVE_CUDA
+	else if(this->device == tnlCudaDevice)
+	{
+		//cout << "fn" << endl;
+		bool end_cuda = false;
+		dim3 threadsPerBlock(this->n, this->n);
+		dim3 numBlocks(this->gridCols,this->gridRows);
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		//cudaMalloc(&runcuda,sizeof(bool));
+		//cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice);
+		//cout << "fn" << endl;
+		bool* tmpb;
+		//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
+		//cudaDeviceSynchronize();
+		//checkCudaDevice;
+		cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost);
+		cudaDeviceSynchronize();
+		checkCudaDevice;
+		//cout << "fn" << endl;
+		int i = 1;
+		time_diff = 0.0;
+		while (run_host || !end_cuda)
+		{
+			cout << "Computing at step "<< i++ << endl;
+			if(run_host != 0 )
+				end_cuda = true;
+			else
+				end_cuda = false;
+			//cout << "a" << endl;
+			cudaDeviceSynchronize();
+			checkCudaDevice;
+			start = std::clock();
+			runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver);
+			//cout << "a" << endl;
+			cudaDeviceSynchronize();
+			time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
+
+			//start = std::clock();
+			synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver);
+			cudaDeviceSynchronize();
+			checkCudaDevice;
+			synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver);
+			cudaDeviceSynchronize();
+			checkCudaDevice;
+			//time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC);
+
+
+			//cout << "a" << endl;
+			//run_host = false;
+			//cout << "in kernel loop" << run_host << endl;
+			//cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost);
+			cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost);
+			//cout << "in kernel loop" << run_host << endl;
+		}
+		cout << "Solving time was: " << time_diff << endl;
+		//cout << "b" << endl;
+
+		//double* tmpu;
+		//cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice);
+		//cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+		//cout << this->work_u.getData()[0] << endl;
+
+		//double * test = (double*)malloc(this->work_u.getSize()*sizeof(double));
+		//cout << test[0] << test[1] << test[2] << test[3] << endl;
+		cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost);
+		//cout << this->tmpw << "   " <<  test[0] << test[1] << test[2] << test[3] << endl;
+		//free(test);
+
+		cudaDeviceSynchronize();
+	}
+#endif
+	contractGrid();
+	this->u0.save("u-00001.tnl");
+	cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() << endl;
+	cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) << endl;
+	cout << "Solver finished" << endl;
+
+#ifdef HAVE_CUDA
+	if(this->device == tnlCudaDevice)
+	{
+		cudaFree(this->runcuda);
+		cudaFree(this->tmpw);
+		cudaFree(this->tmp_map);
+		cudaFree(this->cudaSolver);
+	}
+#endif
+
+}
+
+//north - 1, east - 2, west - 4, south - 8
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
+{
+	cout << "Synchronizig..." << endl;
+	int tmp1, tmp2;
+	int grid1, grid2;
+
+	if(this->currentStep & 1)
+	{
+		for(int j = 0; j < this->gridRows - 1; j++)
+		{
+			for (int i = 0; i < this->gridCols*this->n; i++)
+			{
+				tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i;
+				tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i;
+				grid1 = getSubgridValue(getOwner(tmp1));
+				grid2 = getSubgridValue(getOwner(tmp2));
+				if(getOwner(tmp1)==getOwner(tmp2))
+					cout << "i, j" << i << "," << j << endl;
+				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
+				{
+					this->work_u[tmp2] = this->work_u[tmp1];
+					this->unusedCell[tmp2] = 0;
+					if(grid2 == INT_MAX)
+					{
+						setSubgridValue(getOwner(tmp2), -INT_MAX);
+					}
+					if(! (getBoundaryCondition(getOwner(tmp2)) & 8) )
+						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8);
+				}
+				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
+				{
+					this->work_u[tmp1] = this->work_u[tmp2];
+					this->unusedCell[tmp1] = 0;
+					if(grid1 == INT_MAX)
+					{
+						setSubgridValue(getOwner(tmp1), -INT_MAX);
+					}
+					if(! (getBoundaryCondition(getOwner(tmp1)) & 1) )
+						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1);
+				}
+			}
+		}
+
+	}
+	else
+	{
+		for(int i = 1; i < this->gridCols; i++)
+		{
+			for (int j = 0; j < this->gridRows*this->n; j++)
+			{
+				tmp1 = this->gridCols*this->n*j + i*this->n - 1;
+				tmp2 = this->gridCols*this->n*j + i*this->n ;
+				grid1 = getSubgridValue(getOwner(tmp1));
+				grid2 = getSubgridValue(getOwner(tmp2));
+				if(getOwner(tmp1)==getOwner(tmp2))
+					cout << "i, j" << i << "," << j << endl;
+				if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
+				{
+					this->work_u[tmp2] = this->work_u[tmp1];
+					this->unusedCell[tmp2] = 0;
+					if(grid2 == INT_MAX)
+					{
+						setSubgridValue(getOwner(tmp2), -INT_MAX);
+					}
+					if(! (getBoundaryCondition(getOwner(tmp2)) & 4) )
+						setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4);
+				}
+				else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
+				{
+					this->work_u[tmp1] = this->work_u[tmp2];
+					this->unusedCell[tmp1] = 0;
+					if(grid1 == INT_MAX)
+					{
+						setSubgridValue(getOwner(tmp1), -INT_MAX);
+					}
+					if(! (getBoundaryCondition(getOwner(tmp1)) & 2) )
+						setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2);
+				}
+			}
+		}
+	}
+
+
+	this->currentStep++;
+	int stepValue = this->currentStep + 4;
+	for (int i = 0; i < this->subgridValues.getSize(); i++)
+	{
+		if( getSubgridValue(i) == -INT_MAX )
+			setSubgridValue(i, stepValue);
+	}
+
+	cout << "Grid synchronized at step " << (this->currentStep - 1 ) << endl;
+
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const
+{
+
+	return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const
+{
+	return this->subgridValues[i];
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value)
+{
+	this->subgridValues[i] = value;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const
+{
+	return this->boundaryConditions[i];
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value)
+{
+	this->boundaryConditions[i] = value;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid()
+{
+	cout << "Stretching grid..." << endl;
+
+
+	this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) );
+	this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) );
+
+	//this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ;
+	//this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ;
+
+	cout << "Setting gridCols to " << this->gridCols << "." << endl;
+	cout << "Setting gridRows to " << this->gridRows << "." << endl;
+
+	this->subgridValues.setSize(this->gridCols*this->gridRows);
+	this->subgridValues.setValue(0);
+	this->boundaryConditions.setSize(this->gridCols*this->gridRows);
+	this->boundaryConditions.setValue(0);
+	this->calculationsCount.setSize(this->gridCols*this->gridRows);
+	this->calculationsCount.setValue(0);
+
+	for(int i = 0; i < this->subgridValues.getSize(); i++ )
+	{
+		this->subgridValues[i] = INT_MAX;
+		this->boundaryConditions[i] = 0;
+	}
+
+	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
+
+	if(!this->work_u.setSize(stretchedSize))
+		cerr << "Could not allocate memory for stretched grid." << endl;
+	if(!this->map_stretched.setSize(stretchedSize))
+		cerr << "Could not allocate memory for stretched map." << endl;
+	if(!this->unusedCell.setSize(stretchedSize))
+		cerr << "Could not allocate memory for supporting stretched grid." << endl;
+	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
+	cout << idealStretch << endl;
+
+	for(int i = 0; i < stretchedSize; i++)
+	{
+		this->unusedCell[i] = 1;
+		int diff =(this->n*this->gridCols) - idealStretch ;
+		//cout << "diff = " << diff <<endl;
+		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
+
+		if(i%(this->n*this->gridCols) - idealStretch  >= 0)
+		{
+			//cout << i%(this->n*this->gridCols) - idealStretch +1 << endl;
+			k+= i%(this->n*this->gridCols) - idealStretch +1 ;
+		}
+
+		if(i/(this->n*this->gridCols) - idealStretch + 1  > 0)
+		{
+			//cout << i/(this->n*this->gridCols) - idealStretch + 1  << endl;
+			k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ;
+		}
+
+		//cout << "i = " << i << " : i-k = " << i-k << endl;
+		/*int j=(i % (this->n*this->gridCols)) - ( (this->mesh.getDimensions().x() - this->n)/(this->n - 1) + this->mesh.getDimensions().x() - 1)
+				+ (this->n*this->gridCols - this->mesh.getDimensions().x())*(i/(this->n*this->n*this->gridCols)) ;
+
+		if(j > 0)
+			k += j;
+
+		int l = i-k - (this->u0.getSize() - 1);
+		int m = (l % this->mesh.getDimensions().x());
+
+		if(l>0)
+			k+= l + ( (l / this->mesh.getDimensions().x()) + 1 )*this->mesh.getDimensions().x() - (l % this->mesh.getDimensions().x());*/
+
+		this->work_u[i] = this->u0[i-k];
+///**/cout << "bla "  << i << " " << this->map_stretched.getSize() << " " << i-k << " " << this->map.getData().getSize() << endl;
+		this->map_stretched[i] = this->map[i-k];
+///**/cout << "bla "  << i << " " << this->map_stretched.getSize() << " " << i-k << " " << this->map.getData().getSize() << endl;
+		//cout << (i-k) <<endl;
+	}
+
+
+	cout << "Grid stretched." << endl;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid()
+{
+	cout << "Contracting grid..." << endl;
+	int stretchedSize = this->n*this->n*this->gridCols*this->gridRows;
+
+	int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1);
+	cout << idealStretch << endl;
+
+	for(int i = 0; i < stretchedSize; i++)
+	{
+		int diff =(this->n*this->gridCols) - idealStretch ;
+		int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff;
+
+		if((i%(this->n*this->gridCols) - idealStretch  < 0) && (i/(this->n*this->gridCols) - idealStretch + 1  <= 0))
+		{
+			//cout << i <<" : " <<i-k<< endl;
+			this->u0[i-k] = this->work_u[i];
+		}
+
+	}
+
+	cout << "Grid contracted" << endl;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
+tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const
+{
+	VectorType u;
+	u.setSize(this->n*this->n);
+
+	for( int j = 0; j < u.getSize(); j++)
+	{
+		u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols
+		                     + (i % this->gridCols) * this->n
+		                     + (j/this->n) * this->n*this->gridCols
+		                     + (j % this->n) ];
+	}
+	return u;
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i )
+{
+
+	for( int j = 0; j < this->n*this->n; j++)
+	{
+		int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n);
+		//OMP LOCK index
+		if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) )
+		{
+			this->work_u[index] = u[j];
+			this->unusedCell[index] = 0;
+		}
+		//OMP UNLOCK index
+	}
+}
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType
+tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID)
+{
+
+	VectorType fu;
+
+	fu.setLike(u);
+	fu.setValue( 0.0 );
+
+/*
+ *          Insert Euler-Solver Here
+ */
+
+	/**/
+
+	/*for(int i = 0; i < u.getSize(); i++)
+	{
+		int x = this->subMesh.getCellCoordinates(i).x();
+		int y = this->subMesh.getCellCoordinates(i).y();
+
+		if(x == 0 && (boundaryCondition & 4) && y ==0)
+		{
+			if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
+			{
+				//cout << "x = 0; y = 0" << endl;
+				u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
+			}
+		}
+		else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1)
+		{
+			if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
+			{
+				//cout << "x = 0; y = n" << endl;
+				u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
+			}
+		}
+
+
+		else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0)
+		{
+			if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
+			{
+				//cout << "x = n; y = 0" << endl;
+				u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
+			}
+		}
+		else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1)
+		{
+			if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0)
+			{
+				//cout << "x = n; y = n" << endl;
+				u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >();
+			}
+		}
+
+
+		else if(y == 0 && (boundaryCondition & 8) && x ==0)
+		{
+			if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
+			{
+				//cout << "y = 0; x = 0" << endl;
+				u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
+			}
+		}
+		else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1)
+		{
+			if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
+			{
+				//cout << "y = 0; x = n" << endl;
+				u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
+			}
+		}
+
+
+		else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0)
+		{
+			if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)			{
+				//cout << "y = n; x = 0" << endl;
+				u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
+			}
+		}
+		else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1)
+		{
+			if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0)
+			{
+				//cout << "y = n; x = n" << endl;
+				u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >();
+			}
+		}
+	}*/
+
+	/**/
+
+
+/*	bool tmp = false;
+	for(int i = 0; i < u.getSize(); i++)
+	{
+		if(u[0]*u[i] <= 0.0)
+			tmp=true;
+	}
+
+
+	if(tmp)
+	{}
+	else if(boundaryCondition == 4)
+	{
+		int i;
+		for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i))
+		{
+			int j;
+			for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j))
+			{
+				u[j] = u[i];
+			}
+			u[j] = u[i];
+		}
+		int j;
+		for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j))
+		{
+			u[j] = u[i];
+		}
+		u[j] = u[i];
+	}
+	else if(boundaryCondition == 8)
+	{
+		int i;
+		for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i))
+		{
+			int j;
+			for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j))
+			{
+				u[j] = u[i];
+			}
+			u[j] = u[i];
+		}
+		int j;
+		for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j))
+		{
+			u[j] = u[i];
+		}
+		u[j] = u[i];
+
+	}
+	else if(boundaryCondition == 2)
+	{
+		int i;
+		for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i))
+		{
+			int j;
+			for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j))
+			{
+				u[j] = u[i];
+			}
+			u[j] = u[i];
+		}
+		int j;
+		for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j))
+		{
+			u[j] = u[i];
+		}
+		u[j] = u[i];
+	}
+	else if(boundaryCondition == 1)
+	{
+		int i;
+		for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i))
+		{
+			int j;
+			for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j))
+			{
+				u[j] = u[i];
+			}
+			u[j] = u[i];
+		}
+		int j;
+		for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j))
+		{
+			u[j] = u[i];
+		}
+		u[j] = u[i];
+	}
+*/
+	/**/
+
+
+
+	bool tmp = false;
+	for(int i = 0; i < u.getSize(); i++)
+	{
+		if(u[0]*u[i] <= 0.0)
+			tmp=true;
+		int centreGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1);
+		if(this->unusedCell[centreGID] == 0 || boundaryCondition == 0)
+			tmp = true;
+	}
+	//if(this->currentStep + 3 < getSubgridValue(subGridID))
+		//tmp = true;
+
+
+	double value = Sign(u[0]) * u.absMax();
+
+	if(tmp)
+	{}
+
+
+	//north - 1, east - 2, west - 4, south - 8
+	else if(boundaryCondition == 4)
+	{
+		for(int i = 0; i < this->n; i++)
+			for(int j = 1;j < this->n; j++)
+				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
+				u[i*this->n + j] = value;// u[i*this->n];
+	}
+	else if(boundaryCondition == 2)
+	{
+		for(int i = 0; i < this->n; i++)
+			for(int j =0 ;j < this->n -1; j++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
+				u[i*this->n + j] = value;// u[(i+1)*this->n - 1];
+	}
+	else if(boundaryCondition == 1)
+	{
+		for(int j = 0; j < this->n; j++)
+			for(int i = 0;i < this->n - 1; i++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
+				u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
+	}
+	else if(boundaryCondition == 8)
+	{
+		for(int j = 0; j < this->n; j++)
+			for(int i = 1;i < this->n; i++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
+				u[i*this->n + j] = value;// u[j];
+	}
+
+/*
+
+	else if(boundaryCondition == 5)
+	{
+		for(int i = 0; i < this->n - 1; i++)
+			for(int j = 1;j < this->n; j++)
+				//if(fabs(u[i*this->n + j]) <  fabs(u[i*this->n]))
+				u[i*this->n + j] = value;// u[i*this->n];
+	}
+	else if(boundaryCondition == 10)
+	{
+		for(int i = 1; i < this->n; i++)
+			for(int j =0 ;j < this->n -1; j++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1]))
+				u[i*this->n + j] = value;// u[(i+1)*this->n - 1];
+	}
+	else if(boundaryCondition == 3)
+	{
+		for(int j = 0; j < this->n - 1; j++)
+			for(int i = 0;i < this->n - 1; i++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)]))
+				u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)];
+	}
+	else if(boundaryCondition == 12)
+	{
+		for(int j = 1; j < this->n; j++)
+			for(int i = 1;i < this->n; i++)
+				//if(fabs(u[i*this->n + j]) < fabs(u[j]))
+				u[i*this->n + j] = value;// u[j];
+	}
+*/
+
+
+	/**/
+
+	/*if (u.max() > 0.0)
+		this->stopTime *=(double) this->gridCols;*/
+
+
+   double time = 0.0;
+   double currentTau = this->tau0;
+   double finalTime = this->stopTime;// + 3.0*(u.max() - u.min());
+   if( time + currentTau > finalTime ) currentTau = finalTime - time;
+
+   double maxResidue( 1.0 );
+   //double lastResidue( 10000.0 );
+   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
+   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/)
+   {
+      /****
+       * Compute the RHS
+       */
+
+      for( int i = 0; i < fu.getSize(); i ++ )
+      {
+			Entity.setCoordinates(tnlStaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()));
+			Entity.refresh();
+			neighbourEntities.refresh(subMesh,Entity.getIndex());
+    	  fu[ i ] = schemeHost.getValue( this->subMesh, i, tnlStaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighbourEntities);
+      }
+      maxResidue = fu. absMax();
+
+
+      if( this -> cflCondition * maxResidue != 0.0)
+    	  currentTau =  this -> cflCondition / maxResidue;
+
+     /* if (maxResidue < 0.05)
+    	  cout << "Max < 0.05" << endl;*/
+      if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())
+      {
+    	  //cout << currentTau << " >= " << 2.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >() << endl;
+    	  currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
+      }
+      /*if(maxResidue > lastResidue)
+    	  currentTau *=(1.0/10.0);*/
+
+
+      if( time + currentTau > finalTime ) currentTau = finalTime - time;
+//      for( int i = 0; i < fu.getSize(); i ++ )
+//      {
+//    	  //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] << endl;
+//    	  if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 )
+//    		  currentTau = fabs(u[i]/(2.0*fu[i]));
+//
+//      }
+
+
+      for( int i = 0; i < fu.getSize(); i ++ )
+      {
+    	  double add = u[i] + currentTau * fu[ i ];
+    	  //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) )
+    		  u[ i ] = add;
+      }
+      time += currentTau;
+
+      //cout << '\r' << flush;
+     //cout << maxResidue << "   " << currentTau << " @ " << time << flush;
+     //lastResidue = maxResidue;
+   }
+   //cout << "Time: " << time << ", Res: " << maxResidue <<endl;
+	/*if (u.max() > 0.0)
+		this->stopTime /=(double) this->gridCols;*/
+
+	VectorType solution;
+	solution.setLike(u);
+    for( int i = 0; i < u.getSize(); i ++ )
+  	{
+    	solution[i]=u[i];
+   	}
+	return solution;
+}
+
+
+#ifdef HAVE_CUDA
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
+{
+	//int j = threadIdx.x + threadIdx.y * blockDim.x;
+	int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols
+            + (blockIdx.x) * caller->n
+            + threadIdx.y * caller->n*caller->gridCols
+            + threadIdx.x;
+	//printf("i= %d,j= %d,th= %d\n",i,j,th);
+	*a = caller->work_u_cuda[th];
+	//printf("Hi %f \n", *a);
+	//return ret;
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
+{
+//	int j = threadIdx.x + threadIdx.y * blockDim.x;
+	int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols
+            + (blockIdx.x) * caller->n
+            + threadIdx.y * caller->n*caller->gridCols
+            + threadIdx.x;
+
+	if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) )
+	{
+		caller->work_u_cuda[index] = *a;
+		caller->unusedCell_cuda[index] = 0;
+
+	}
+
+	*a = caller->work_u_cuda[index];
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i )
+{
+
+
+//	int j = threadIdx.x + threadIdx.y * blockDim.x;
+	//printf("j = %d, u = %f\n", j,u);
+
+		int index = (blockIdx.y)*this->n*this->n*this->gridCols
+					+ (blockIdx.x)*this->n
+					+ threadIdx.y*this->n*this->gridCols
+					+ threadIdx.x;
+
+		//printf("i= %d,j= %d,index= %d\n",i,j,index);
+		if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )
+		{
+			this->work_u_cuda[index] = u;
+			this->unusedCell_cuda[index] = 0;
+
+		}
+
+
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID)
+{
+
+	__shared__ int tmp;
+	__shared__ double value;
+	//double tmpRes = 0.0;
+	volatile double* sharedTau = &u[blockDim.x*blockDim.y];
+	double* map_local = &u[2*blockDim.x*blockDim.y];
+
+	int i = threadIdx.x;
+	int j = threadIdx.y;
+	int l = threadIdx.y * blockDim.x + threadIdx.x;
+	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
+
+	/* LOAD MAP */
+	int index = (blockIdx.y) * this->n*this->n*this->gridCols
+            + (blockIdx.x) * this->n
+            + threadIdx.y * this->n*this->gridCols
+            + threadIdx.x;
+	map_local[l]=this->map_stretched_cuda[index];
+	/* LOADED */
+
+	bool computeFU = !((i == 0 && (boundaryCondition & 4)) or
+			 (i == blockDim.x - 1 && (boundaryCondition & 2)) or
+			 (j == 0 && (boundaryCondition & 8)) or
+			 (j == blockDim.y - 1  && (boundaryCondition & 1)));
+
+	if(l == 0)
+	{
+		tmp = 0;
+		int centreGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1);
+		if(this->unusedCell_cuda[centreGID] == 0 || boundaryCondition == 0)
+			tmp = 1;
+	}
+	__syncthreads();
+
+	/*if(!tmp && (u[0]*u[l] <= 0.0))
+		atomicMax( &tmp, 1);*/
+
+	__syncthreads();
+	if(tmp !=1)
+	{
+//		if(computeFU)
+//			absVal[l]=0.0;
+//		else
+//			absVal[l] = fabs(u[l]);
+//
+//		__syncthreads();
+//
+//	      if((blockDim.x == 16) && (l < 128))		absVal[l] = Max(absVal[l],absVal[l+128]);
+//	      __syncthreads();
+//	      if((blockDim.x == 16) && (l < 64))		absVal[l] = Max(absVal[l],absVal[l+64]);
+//	      __syncthreads();
+//	      if(l < 32)    							absVal[l] = Max(absVal[l],absVal[l+32]);
+//	      if(l < 16)								absVal[l] = Max(absVal[l],absVal[l+16]);
+//	      if(l < 8)									absVal[l] = Max(absVal[l],absVal[l+8]);
+//	      if(l < 4)									absVal[l] = Max(absVal[l],absVal[l+4]);
+//	      if(l < 2)									absVal[l] = Max(absVal[l],absVal[l+2]);
+//	      if(l < 1)									value   = Sign(u[0])*Max(absVal[l],absVal[l+1]);
+//		__syncthreads();
+//
+//		if(computeFU)
+//			u[l] = value;
+		if(computeFU)
+		{
+			if(boundaryCondition == 4)
+				u[l] = u[threadIdx.y * blockDim.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+  2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n);
+			else if(boundaryCondition == 2)
+				u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n);
+			else if(boundaryCondition == 8)
+				u[l] = u[threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n);
+			else if(boundaryCondition == 1)
+				u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y  - 1 +this->n);
+		}
+	}
+
+   double time = 0.0;
+   __shared__ double currentTau;
+   double cfl = this->cflCondition;
+   double fu = 0.0;
+//   if(threadIdx.x * threadIdx.y == 0)
+//   {
+//	   currentTau = finalTime;
+//   }
+   double finalTime = this->stopTime;
+   __syncthreads();
+//   if( time + currentTau > finalTime ) currentTau = finalTime - time;
+
+   tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh);
+   tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity);
+   Entity.setCoordinates(tnlStaticVector<2,int>(i,j));
+   Entity.refresh();
+   neighbourEntities.refresh(subMesh,Entity.getIndex());
+
+
+	if(map_local[l] == 0.0)
+	{
+		u[l] = /*Sign(u[l])**/MAP_SOLVER_MAX_VALUE;
+		computeFU = false;
+	}
+	__syncthreads();
+
+
+   while( time < finalTime )
+   {
+	  sharedTau[l] = finalTime;
+
+	  if(computeFU)
+	  {
+		  fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighbourEntities, map_local);
+	  	  sharedTau[l]=abs(cfl/fu);
+	  }
+
+
+
+      if(l == 0)
+      {
+    	  if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >())	sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >();
+      }
+      else if(l == blockDim.x*blockDim.y - 1)
+    	  if( time + sharedTau[l] > finalTime )		sharedTau[l] = finalTime - time;
+
+
+//      if(  (Sign(u[l]+sharedTau[l]*fu) != Sign(u[l])) && fu != 0.0 && fu != -0.0)
+//    	  {
+//    	  printf("orig: %10f", sharedTau[l]);
+//    	  sharedTau[l]=abs(u[l]/(1.1*fu)) ;
+//    	  printf("   new: %10f\n", sharedTau[l]);
+//    	  }
+
+
+
+      if((blockDim.x == 16) && (l < 128))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]);
+      __syncthreads();
+      if((blockDim.x == 16) && (l < 64))		sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]);
+      __syncthreads();
+      if(l < 32)    							sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]);
+      if(l < 16)								sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]);
+      if(l < 8)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]);
+      if(l < 4)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]);
+      if(l < 2)									sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]);
+      if(l < 1)									currentTau   = Min(sharedTau[l],sharedTau[l+1]);
+      __syncthreads();
+
+      if(computeFU)
+    	  u[l] += currentTau * fu;
+//      if(abs(u[l]) > MAP_SOLVER_MAX_VALUE)
+//      {
+//  		u[l] = /*Sign(u[l])**/MAP_SOLVER_MAX_VALUE;
+//  		computeFU = false;
+//      }
+      time += currentTau;
+   }
+
+
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const
+{
+
+	return ((i / (this->gridCols*this->n*this->n))*this->gridCols
+			+ (i % (this->gridCols*this->n))/this->n);
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const
+{
+	return this->subgridValues_cuda[i];
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value)
+{
+	this->subgridValues_cuda[i] = value;
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const
+{
+	return this->boundaryConditions_cuda[i];
+}
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__device__
+void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value)
+{
+	this->boundaryConditions_cuda[i] = value;
+}
+
+
+
+//north - 1, east - 2, west - 4, south - 8
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__
+void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now
+{
+
+	__shared__ int boundary[4]; // north,east,west,south
+	__shared__ int subgridValue;
+	__shared__ int newSubgridValue;
+
+
+	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
+	double u = cudaSolver->work_u_cuda[gid];
+	double u_cmp;
+	int subgridValue_cmp=INT_MAX;
+	int boundary_index=0;
+
+
+	if(threadIdx.x+threadIdx.y == 0)
+	{
+		subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x);
+		boundary[0] = 0;
+		boundary[1] = 0;
+		boundary[2] = 0;
+		boundary[3] = 0;
+		newSubgridValue = 0;
+		//printf("%d   %d\n", blockDim.x, gridDim.x);
+	}
+	__syncthreads();
+
+
+
+	if(		(threadIdx.x == 0 				/*				&& !(cudaSolver->currentStep & 1)*/) 		||
+			(threadIdx.y == 0 				 	/*			&& (cudaSolver->currentStep & 1)*/) 		||
+			(threadIdx.x == blockDim.x - 1 	 /*	&& !(cudaSolver->currentStep & 1)*/) 		||
+			(threadIdx.y == blockDim.y - 1 	 /*	&& (cudaSolver->currentStep & 1)*/) 		)
+	{
+		if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/)
+		{
+			u_cmp = cudaSolver->work_u_cuda[gid - 1];
+			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1);
+			boundary_index = 2;
+		}
+
+		if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/)
+		{
+			u_cmp = cudaSolver->work_u_cuda[gid + 1];
+			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1);
+			boundary_index = 1;
+		}
+
+		__threadfence();
+		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
+		{
+			cudaSolver->unusedCell_cuda[gid] = 0;
+			atomicMax(&newSubgridValue, INT_MAX);
+			atomicMax(&boundary[boundary_index], 1);
+			cudaSolver->work_u_cuda[gid] = u_cmp;
+			u=u_cmp;
+		}
+		__threadfence();
+		if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/)
+		{
+			u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x];
+			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x);
+			boundary_index = 3;
+		}
+		if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/)
+		{
+			u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x];
+			subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x);
+			boundary_index = 0;
+		}
+
+//		__threadfence();
+		if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX))
+		{
+			cudaSolver->unusedCell_cuda[gid] = 0;
+			atomicMax(&newSubgridValue, INT_MAX);
+			atomicMax(&boundary[boundary_index], 1);
+			cudaSolver->work_u_cuda[gid] = u_cmp;
+		}
+	}
+	__threadfence();
+	__syncthreads();
+
+	if(threadIdx.x+threadIdx.y == 0)
+	{
+		if(subgridValue == INT_MAX && newSubgridValue !=0)
+			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX);
+
+		cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, 	boundary[0] +
+																				2 * boundary[1] +
+																				4 * boundary[2] +
+																				8 * boundary[3]);
+
+
+		if(blockIdx.x+blockIdx.y ==0)
+		{
+			cudaSolver->currentStep = cudaSolver->currentStep + 1;
+			*(cudaSolver->runcuda) = 0;
+		}
+//
+//		int stepValue = cudaSolver->currentStep + 4;
+//		if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
+//				cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue);
+//
+//		atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x));
+	}
+
+
+	/*
+	//printf("I am not an empty kernel!\n");
+	//cout << "Synchronizig..." << endl;
+	int tmp1, tmp2;
+	int grid1, grid2;
+
+	if(cudaSolver->currentStep & 1)
+	{
+		//printf("I am not an empty kernel! 1\n");
+		for(int j = 0; j < cudaSolver->gridRows - 1; j++)
+		{
+			//printf("I am not an empty kernel! 3\n");
+			for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++)
+			{
+				tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i;
+				tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i;
+				grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1));
+				grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2));
+
+				if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
+				{
+					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
+					cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1];
+					cudaSolver->unusedCell_cuda[tmp2] = 0;
+					if(grid2 == INT_MAX)
+					{
+						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX);
+					}
+					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) )
+						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8);
+				}
+				else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
+				{
+					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
+					cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2];
+					cudaSolver->unusedCell_cuda[tmp1] = 0;
+					if(grid1 == INT_MAX)
+					{
+						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX);
+					}
+					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) )
+						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1);
+				}
+			}
+		}
+
+	}
+	else
+	{
+		//printf("I am not an empty kernel! 2\n");
+		for(int i = 1; i < cudaSolver->gridCols; i++)
+		{
+			//printf("I am not an empty kernel! 4\n");
+			for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++)
+			{
+
+				tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1;
+				tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ;
+				grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1));
+				grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2));
+
+				if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX))
+				{
+					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
+					cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1];
+					cudaSolver->unusedCell_cuda[tmp2] = 0;
+					if(grid2 == INT_MAX)
+					{
+						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX);
+					}
+					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) )
+						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4);
+				}
+				else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX))
+				{
+					//printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2));
+					cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2];
+					cudaSolver->unusedCell_cuda[tmp1] = 0;
+					if(grid1 == INT_MAX)
+					{
+						cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX);
+					}
+					if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) )
+						cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2);
+				}
+			}
+		}
+	}
+	//printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep);
+
+	cudaSolver->currentStep = cudaSolver->currentStep + 1;
+	int stepValue = cudaSolver->currentStep + 4;
+	for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++)
+	{
+		if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX )
+			cudaSolver->setSubgridValueCUDA2D(i, stepValue);
+	}
+
+	int maxi = 0;
+	for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++)
+	{
+		//printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]);
+		maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q));
+	}
+	//printf("I am not an empty kernel! %d\n", maxi);
+	*(cudaSolver->runcuda) = (maxi > 0);
+	//printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]);
+	//cout << "Grid synchronized at step " << (this->currentStep - 1 ) << endl;
+*/
+}
+
+
+
+template <typename SchemeHost, typename SchemeDevice, typename Device>
+__global__
+void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver)
+{
+//	if(blockIdx.x+blockIdx.y ==0)
+//	{
+//		cudaSolver->currentStep = cudaSolver->currentStep + 1;
+//		*(cudaSolver->runcuda) = 0;
+//	}
+
+	int stepValue = cudaSolver->currentStep + 4;
+	if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX )
+			cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue);
+
+	atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x));
+}
+
+
+
+
+
+
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device>
+__global__
+void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr)
+{
+	//cout << "Initializating solver..." << endl;
+	//const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh");
+	//this->mesh_cuda.load( meshLocation );
+
+	//this->n_cuda = parameters.getParameter <int>("subgrid-size");
+	//cout << "Setting N << this->n_cuda << endl;
+
+	//this->subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda );
+	//this->subMesh_cuda.setDomain( tnlStaticVector<2,double>(0.0, 0.0),
+							 //tnlStaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) );
+
+	//this->subMesh_cuda.save("submesh.tnl");
+
+//	const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition");
+//	this->u0.load( initialCondition );
+
+	//cout << this->mesh.getCellCenter(0) << endl;
+
+	//this->delta_cuda = parameters.getParameter <double>("delta");
+	//this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >();
+
+	//cout << "Setting delta to " << this->delta << endl;
+
+	//this->tau0_cuda = parameters.getParameter <double>("initial-tau");
+	//cout << "Setting initial tau to " << this->tau0_cuda << endl;
+	//this->stopTime_cuda = parameters.getParameter <double>("stop-time");
+
+	//this->cflCondition_cuda = parameters.getParameter <double>("cfl-condition");
+	//this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >());
+	//cout << "Setting CFL to " << this->cflCondition << endl;
+////
+////
+
+//	this->gridRows_cuda = gridRows;
+//	this->gridCols_cuda = gridCols;
+
+	cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double));
+	cudaSolver->map_stretched_cuda = tmp_map_ptr;
+	cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int));
+	cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
+	cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
+	cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool));
+	*(cudaSolver->runcuda) = 1;
+	cudaSolver->currentStep = 1;
+	//cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost);
+	//ptr = cudaSolver->work_u_cuda;
+	printf("GPU memory allocated.\n");
+
+	for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++)
+	{
+		cudaSolver->subgridValues_cuda[i] = INT_MAX;
+		cudaSolver->boundaryConditions_cuda[i] = 0;
+	}
+
+	/*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++)
+	{
+		printf("%d\n",j);
+		cudaSolver->unusedCell_cuda[ j] = 1;
+	}*/
+	printf("GPU memory initialized.\n");
+
+
+	//cudaSolver->work_u_cuda[50] = 32.153438;
+////
+////
+	//stretchGrid();
+	//this->stopTime_cuda /= (double)(this->gridCols_cuda);
+	//this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0));
+	//cout << "Setting stopping time to " << this->stopTime << endl;
+	//this->stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter <double>("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >();
+	//cout << "Setting stopping time to " << this->stopTime << endl;
+
+	//cout << "Initializating scheme..." << endl;
+	//if(!this->schemeDevice.init(parameters))
+//	{
+		//cerr << "Scheme failed to initialize." << endl;
+//		return false;
+//	}
+	//cout << "Scheme initialized." << endl;
+
+	//test();
+
+//	this->currentStep_cuda = 1;
+	//return true;
+}
+
+
+
+
+//extern __shared__ double array[];
+template< typename SchemeHost, typename SchemeDevice, typename Device >
+__global__
+void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
+
+{
+
+
+	extern __shared__ double u[];
+	//printf("%p\n",caller->work_u_cuda);
+
+	int i = blockIdx.y * gridDim.x + blockIdx.x;
+	int l = threadIdx.y * blockDim.x + threadIdx.x;
+
+	__shared__ int containsCurve;
+	if(l == 0)
+		containsCurve = 0;
+
+	//double a;
+	caller->getSubgridCUDA2D(i,caller, &u[l]);
+	//printf("%f   %f\n",a , u[l]);
+	//u[l] = a;
+	//printf("Hi %f \n", u[l]);
+	__syncthreads();
+	//printf("hurewrwr %f \n", u[l]);
+	if(u[0] * u[l] <= 0.0)
+	{
+		//printf("contains %d \n",i);
+		atomicMax( &containsCurve, 1);
+	}
+
+	__syncthreads();
+	//printf("hu");
+	//printf("%d : %f\n", l, u[l]);
+	if(containsCurve == 1)
+	{
+		//printf("have curve \n");
+		caller->runSubgridCUDA2D(0,u,i);
+		//printf("%d : %f\n", l, u[l]);
+		__syncthreads();
+		caller->insertSubgridCUDA2D(u[l],i);
+		__syncthreads();
+		if(l == 0)
+			caller->setSubgridValueCUDA2D(i, 4);
+	}
+
+
+}
+
+
+
+
+
+template< typename SchemeHost, typename SchemeDevice, typename Device >
+__global__
+void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller)
+{
+	extern __shared__ double u[];
+	int i = blockIdx.y * gridDim.x + blockIdx.x;
+	int l = threadIdx.y * blockDim.x + threadIdx.x;
+	int bound = caller->getBoundaryConditionCUDA2D(i);
+
+	if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0)
+	{
+		caller->getSubgridCUDA2D(i,caller, &u[l]);
+
+		//if(l == 0)
+			//printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i));
+		if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4)
+		{
+			if(bound & 1)
+			{
+				caller->runSubgridCUDA2D(1,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if(bound & 2 )
+			{
+				caller->runSubgridCUDA2D(2,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if(bound & 4)
+			{
+				caller->runSubgridCUDA2D(4,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if(bound & 8)
+			{
+				caller->runSubgridCUDA2D(8,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+
+
+
+
+
+			if( ((bound & 3 )))
+				{
+					caller->runSubgridCUDA2D(3,u,i);
+					//__syncthreads();
+					//caller->insertSubgridCUDA2D(u[l],i);
+					//__syncthreads();
+					//caller->getSubgridCUDA2D(i,caller, &u[l]);
+					caller->updateSubgridCUDA2D(i,caller, &u[l]);
+					__syncthreads();
+				}
+				if( ((bound & 5 )))
+				{
+					caller->runSubgridCUDA2D(5,u,i);
+					//__syncthreads();
+					//caller->insertSubgridCUDA2D(u[l],i);
+					//__syncthreads();
+					//caller->getSubgridCUDA2D(i,caller, &u[l]);
+					caller->updateSubgridCUDA2D(i,caller, &u[l]);
+					__syncthreads();
+				}
+				if( ((bound & 10 )))
+				{
+					caller->runSubgridCUDA2D(10,u,i);
+					//__syncthreads();
+					//caller->insertSubgridCUDA2D(u[l],i);
+					//__syncthreads();
+					//caller->getSubgridCUDA2D(i,caller, &u[l]);
+					caller->updateSubgridCUDA2D(i,caller, &u[l]);
+					__syncthreads();
+				}
+				if(   (bound & 12 ))
+				{
+					caller->runSubgridCUDA2D(12,u,i);
+					//__syncthreads();
+					//caller->insertSubgridCUDA2D(u[l],i);
+					//__syncthreads();
+					//caller->getSubgridCUDA2D(i,caller, &u[l]);
+					caller->updateSubgridCUDA2D(i,caller, &u[l]);
+					__syncthreads();
+				}
+
+
+
+
+
+		}
+
+
+		else
+		{
+
+
+
+
+
+
+
+
+
+			if( ((bound == 2)))
+						{
+							caller->runSubgridCUDA2D(2,u,i);
+							//__syncthreads();
+							//caller->insertSubgridCUDA2D(u[l],i);
+							//__syncthreads();
+							//caller->getSubgridCUDA2D(i,caller, &u[l]);
+							caller->updateSubgridCUDA2D(i,caller, &u[l]);
+							__syncthreads();
+						}
+						if( ((bound == 1) ))
+						{
+							caller->runSubgridCUDA2D(1,u,i);
+							//__syncthreads();
+							//caller->insertSubgridCUDA2D(u[l],i);
+							//__syncthreads();
+							//caller->getSubgridCUDA2D(i,caller, &u[l]);
+							caller->updateSubgridCUDA2D(i,caller, &u[l]);
+							__syncthreads();
+						}
+						if( ((bound == 8) ))
+						{
+							caller->runSubgridCUDA2D(8,u,i);
+							//__syncthreads();
+							//caller->insertSubgridCUDA2D(u[l],i);
+							//__syncthreads();
+							//caller->getSubgridCUDA2D(i,caller, &u[l]);
+							caller->updateSubgridCUDA2D(i,caller, &u[l]);
+							__syncthreads();
+						}
+						if(   (bound == 4))
+						{
+							caller->runSubgridCUDA2D(4,u,i);
+							//__syncthreads();
+							//caller->insertSubgridCUDA2D(u[l],i);
+							//__syncthreads();
+							//caller->getSubgridCUDA2D(i,caller, &u[l]);
+							caller->updateSubgridCUDA2D(i,caller, &u[l]);
+							__syncthreads();
+						}
+
+
+
+
+
+
+
+
+
+
+			if( ((bound & 3) ))
+			{
+				caller->runSubgridCUDA2D(3,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if( ((bound & 5) ))
+			{
+				caller->runSubgridCUDA2D(5,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if( ((bound & 10) ))
+			{
+				caller->runSubgridCUDA2D(10,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+			if(   (bound & 12) )
+			{
+				caller->runSubgridCUDA2D(12,u,i);
+				//__syncthreads();
+				//caller->insertSubgridCUDA2D(u[l],i);
+				//__syncthreads();
+				//caller->getSubgridCUDA2D(i,caller, &u[l]);
+				caller->updateSubgridCUDA2D(i,caller, &u[l]);
+				__syncthreads();
+			}
+
+
+
+
+
+
+
+
+
+
+
+
+		}
+		/*if( bound )
+		{
+			caller->runSubgridCUDA2D(15,u,i);
+			__syncthreads();
+			//caller->insertSubgridCUDA2D(u[l],i);
+			//__syncthreads();
+			//caller->getSubgridCUDA2D(i,caller, &u[l]);
+			caller->updateSubgridCUDA2D(i,caller, &u[l]);
+			__syncthreads();
+		}*/
+
+		if(l==0)
+		{
+			caller->setBoundaryConditionCUDA2D(i, 0);
+			caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 );
+		}
+
+
+	}
+
+
+
+}
+
+#endif /*HAVE_CUDA*/
+
+#endif /* TNLPARALLELMAPSOLVER2D_IMPL_H_ */
diff --git a/examples/hamilton-jacobi-parallel/CMakeLists.txt b/examples/hamilton-jacobi-parallel/CMakeLists.txt
index 3911e6108b490c4d6fe52f174b3151e986a18e73..b65f3e6a4c8d0cc22a5e10dc46f298fcc5e0dfc0 100755
--- a/examples/hamilton-jacobi-parallel/CMakeLists.txt
+++ b/examples/hamilton-jacobi-parallel/CMakeLists.txt
@@ -1,6 +1,7 @@
 set( tnl_hamilton_jacobi_parallel_SOURCES
 #     MainBuildConfig.h
-#     tnlParallelEikonalSolver_impl.h
+#     tnlParallelEikonalSolver2D_impl.h
+#     tnlParallelEikonalSolver3D_impl.h
 #     tnlParallelEikonalSolver.h
 #     parallelEikonalConfig.h 
      main.cpp)
diff --git a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h
index b79341e909422b0b2ac67abb7ad0080a52ad2441..bcc4c1fe8040b9a5b5027149af4a74de6746b150 100644
--- a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h
+++ b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          hamiltonJacobiProblemConfig.h  -  description
+                          parallelEikonalConfig.h  -  description
                              -------------------
     begin                : Oct 5, 2014
     copyright            : (C) 2014 by Tomas Sobotik
@@ -15,8 +15,8 @@
  *                                                                         *
  ***************************************************************************/
 
-#ifndef HAMILTONJACOBIPROBLEMCONFIG_H_
-#define HAMILTONJACOBIPROBLEMCONFIG_H_
+#ifndef HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_
+#define HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_
 
 #include <config/tnlConfigDescription.h>
 
@@ -43,4 +43,4 @@ class parallelEikonalConfig
       }
 };
 
-#endif /* HAMILTONJACOBIPROBLEMCONFIG_H_ */
+#endif /* HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ */
diff --git a/src/operators/godunov-eikonal/CMakeLists.txt b/src/operators/godunov-eikonal/CMakeLists.txt
index c24fc262f85f7a8c9a9fdfb0add69564cf2ca00a..8b74f755ed0a74e7001f1c155c60f61c650679d2 100755
--- a/src/operators/godunov-eikonal/CMakeLists.txt
+++ b/src/operators/godunov-eikonal/CMakeLists.txt
@@ -1,11 +1,13 @@
 set( headers godunovEikonal.h
 	     parallelGodunovEikonal.h
+	     parallelGodunovMap.h
 	     godunovEikonal1D_impl.h
              godunovEikonal2D_impl.h
              godunovEikonal3D_impl.h
              parallelGodunovEikonal1D_impl.h
              parallelGodunovEikonal2D_impl.h
-             parallelGodunovEikonal3D_impl.h)
+             parallelGodunovEikonal3D_impl.h
+             parallelGodunovMap2D_impl.h)
 
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/operators/godunov-eikonal )
 
diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal.h b/src/operators/godunov-eikonal/parallelGodunovEikonal.h
index 6954c3008c8a43ff85efd6ef806b664b13ba735c..5e0affb345ec52ebc63f17a52969a06269cf31d2 100644
--- a/src/operators/godunov-eikonal/parallelGodunovEikonal.h
+++ b/src/operators/godunov-eikonal/parallelGodunovEikonal.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          godunov.h  -  description
+                          parallelGodunovEikonal.h  -  description
                              -------------------
     begin                : Dec 1 , 2014
     copyright            : (C) 2014 by Tomas Sobotik
@@ -14,8 +14,8 @@
  *                                                                         *
  ***************************************************************************/
 
-#ifndef GODUNOVEIKONAL_H_
-#define GODUNOVEIKONAL_H_
+#ifndef PARALLELGODUNOVEIKONAL_H_
+#define PARALLELGODUNOVEIKONAL_H_
 
 #include <matrices/tnlCSRMatrix.h>
 #include <solvers/preconditioners/tnlDummyPreconditioner.h>
@@ -266,4 +266,4 @@ protected:
 #include <operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h>
 
 
-#endif /* GODUNOVEIKONAL_H_ */
+#endif /* PARALLELGODUNOVEIKONAL_H_ */
diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h
index cb0670518470254a201d9d025b2fa2b271d3f76c..cf12f544d7dafee24cfc992c3902038d03b14c80 100644
--- a/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h
+++ b/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h
@@ -14,8 +14,8 @@
  *                                                                         *
  ***************************************************************************/
 
-#ifndef GODUNOVEIKONAL1D_IMPL_H_
-#define GODUNOVEIKONAL1D_IMPL_H_
+#ifndef PARALLELGODUNOVEIKONAL1D_IMPL_H_
+#define PARALLELGODUNOVEIKONAL1D_IMPL_H_
 
 
 
@@ -167,4 +167,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 1, MeshReal, Device, MeshIndex >, Re
 }
 
 
-#endif /* GODUNOVEIKONAL1D_IMPL_H_ */
+#endif /* PARALLELGODUNOVEIKONAL1D_IMPL_H_ */
diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h
index 5ac0dd0d9b05ab21ade02f4526c9b65f01337f5c..7a762a09a9ce7c428c2d429f4db10ac7f70d30fb 100644
--- a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h
+++ b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h
@@ -14,8 +14,8 @@
  *                                                                         *
  ***************************************************************************/
 
-#ifndef GODUNOVEIKONAL2D_IMPL_H_
-#define GODUNOVEIKONAL2D_IMPL_H_
+#ifndef PARALLELGODUNOVEIKONAL2D_IMPL_H_
+#define PARALLELGODUNOVEIKONAL2D_IMPL_H_
 
 
 template< typename MeshReal,
@@ -481,4 +481,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re
 }
 
 
-#endif /* GODUNOVEIKONAL2D_IMPL_H_ */
+#endif /* PARALLELGODUNOVEIKONAL2D_IMPL_H_ */
diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h
index 2ab0f2c42eae9fe7100ea4cd76f6446dcd4362cf..7206cc9a40d69f1c70441238dddd42e106ef5949 100644
--- a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h
+++ b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h
@@ -14,8 +14,8 @@
  *                                                                         *
  ***************************************************************************/
 
-#ifndef GODUNOVEIKONAL3D_IMPL_H_
-#define GODUNOVEIKONAL3D_IMPL_H_
+#ifndef PARALLELGODUNOVEIKONAL3D_IMPL_H_
+#define PARALLELGODUNOVEIKONAL3D_IMPL_H_
 
 
 template< typename MeshReal,
@@ -384,4 +384,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re
 }
 
 
-#endif /* GODUNOVEIKONAL3D_IMPL_H_ */
+#endif /* PARALLELGODUNOVEIKONAL3D_IMPL_H_ */
diff --git a/src/operators/godunov-eikonal/parallelGodunovMap.h b/src/operators/godunov-eikonal/parallelGodunovMap.h
new file mode 100644
index 0000000000000000000000000000000000000000..19a764d9ba2535985549a08484c84c6d09084201
--- /dev/null
+++ b/src/operators/godunov-eikonal/parallelGodunovMap.h
@@ -0,0 +1,270 @@
+/***************************************************************************
+                          parallelGodunovMap.h  -  description
+                             -------------------
+    begin                : Dec 1 , 2014
+    copyright            : (C) 2014 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef PARALLELGODUNOVMAP_H_
+#define PARALLELGODUNOVMAP_H_
+
+#include <matrices/tnlCSRMatrix.h>
+#include <solvers/preconditioners/tnlDummyPreconditioner.h>
+#include <solvers/tnlSolverMonitor.h>
+#include <core/tnlLogger.h>
+#include <core/vectors/tnlVector.h>
+#include <core/vectors/tnlSharedVector.h>
+#include <core/mfilename.h>
+#include <mesh/tnlGrid.h>
+#include <core/tnlCuda.h>
+#include <mesh/grids/tnlGridEntity.h>
+
+
+template< typename Mesh,
+		  typename Real,
+		  typename Index >
+class parallelGodunovMapScheme
+{
+};
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class parallelGodunovMapScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 1, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	static tnlString getType();
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	RealType positivePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	RealType negativePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	RealType sign(const RealType x, const RealType eps) const;
+
+    template< typename Vector >
+ #ifdef HAVE_CUDA
+    __device__ __host__
+ #endif
+    Real getValue( const MeshType& mesh,
+                   const IndexType cellIndex,
+                   const CoordinatesType& coordinates,
+                   const Vector& u,
+                   const RealType& time,
+                   const IndexType boundaryCondition) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	bool init( const tnlParameterContainer& parameters );
+
+   RealType epsilon;
+
+protected:
+
+	MeshType originalMesh;
+
+	DofVectorType dofVector;
+
+	RealType h;
+
+
+
+
+};
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 2, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	static tnlString getType();
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType positivePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType negativePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType sign(const RealType x, const Real eps) const;
+
+    template< typename Vector >
+ #ifdef HAVE_CUDA
+    __device__ __host__
+ #endif
+    Real getValue( const MeshType& mesh,
+                   const IndexType cellIndex,
+                   const CoordinatesType& coordinates,
+                   const Vector& u,
+                   const RealType& time,
+                   const IndexType boundaryCondition,
+                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities) const;
+
+ #ifdef HAVE_CUDA
+    __device__
+ #endif
+    Real getValueDev( const MeshType& mesh,
+                   const IndexType cellIndex,
+                   const CoordinatesType& coordinates,
+                   const RealType* u,
+                   const RealType& time,
+                   const IndexType boundaryCondition,
+                   const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+  	  	  	       const RealType* map) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	bool init( const tnlParameterContainer& parameters );
+
+   RealType epsilon;
+
+protected:
+
+ 	MeshType originalMesh;
+
+    DofVectorType dofVector;
+
+    RealType hx, ihx;
+    RealType hy, ihy;
+
+
+
+
+};
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+class parallelGodunovMapScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >
+{
+
+public:
+	typedef Real RealType;
+	typedef Device DeviceType;
+	typedef Index IndexType;
+	typedef tnlGrid< 3, Real, Device, Index > MeshType;
+	typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType;
+	typedef typename MeshType::CoordinatesType CoordinatesType;
+
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+	static tnlString getType();
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType positivePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType negativePart(const RealType arg) const;
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    RealType sign(const RealType x, const Real eps) const;
+
+    template< typename Vector >
+ #ifdef HAVE_CUDA
+    __device__ __host__
+ #endif
+    Real getValue( const MeshType& mesh,
+                   const IndexType cellIndex,
+                   const CoordinatesType& coordinates,
+                   const Vector& u,
+                   const RealType& time,
+                   const IndexType boundaryCondition,
+  	  	  	       const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities  ) const;
+
+#ifdef HAVE_CUDA
+   __device__
+#endif
+   Real getValueDev( const MeshType& mesh,
+                  const IndexType cellIndex,
+                  const CoordinatesType& coordinates,
+                  const RealType* u,
+                  const RealType& time,
+                  const IndexType boundaryCondition,
+ 	  	  	      const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities) const;
+
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+    bool init( const tnlParameterContainer& parameters );
+
+   RealType epsilon;
+
+protected:
+
+ 	MeshType originalMesh;
+
+    DofVectorType dofVector;
+
+    RealType hx, ihx;
+    RealType hy, ihy;
+    RealType hz, ihz;
+
+};
+
+
+
+//#include <operators/godunov-eikonal/parallelGodunovMap1D_impl.h>
+#include <operators/godunov-eikonal/parallelGodunovMap2D_impl.h>
+//#include <operators/godunov-eikonal/parallelGodunovMap3D_impl.h>
+
+
+#endif /* PARALLELGODUNOVMAP_H_ */
diff --git a/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..3be04db3fde451ade9dae1dea33984dff52db93a
--- /dev/null
+++ b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h
@@ -0,0 +1,507 @@
+/***************************************************************************
+                          parallelGodunovMap2D_impl.h  -  description
+                             -------------------
+    begin                : Dec 1 , 2014
+    copyright            : (C) 2014 by Tomas Sobotik
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef PARALLELGODUNOVMAP2D_IMPL_H_
+#define PARALLELGODUNOVMAP2D_IMPL_H_
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+Real parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const
+{
+	if(arg > 0.0)
+		return arg;
+	return 0.0;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+Real  parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const
+{
+	if(arg < 0.0)
+		return -arg;
+	return 0.0;
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+Real parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: sign(const Real x, const Real eps) const
+{
+	if(x > eps)
+		return 1.0;
+	if (x < -eps)
+		return (-1.0);
+
+	if ( x == 0.0)
+		return 0.0;
+
+	return sin(/*(M_PI*x)/(2.0*eps)	*/(M_PI/2.0)*(x/eps));
+}
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+bool parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters )
+{
+
+	   const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" );
+	   //MeshType originalMesh;
+	   if( ! originalMesh.load( meshFile ) )
+	   {
+		   //cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl;
+		   return false;
+	   }
+
+
+
+
+	   hx = originalMesh.template getSpaceStepsProducts< 1, 0 >();
+	   ihx = 1.0/hx;
+	   hy = originalMesh.template getSpaceStepsProducts< 0, 1 >();
+	   ihy = 1.0/hy;
+
+	   this->epsilon = parameters. getParameter< double >( "epsilon" );
+
+	   this->epsilon *=sqrt( hx*hx + hy*hy );
+
+//	   dofVector. setSize( this->mesh.getDofs() );
+
+	   return true;
+
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+#ifdef HAVE_CUDA
+   __device__ __host__
+#endif
+tnlString parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index > :: getType()
+{
+   return tnlString( "parallelGodunovMapScheme< " ) +
+          MeshType::getType() + ", " +
+          ::getType< Real >() + ", " +
+          ::getType< Index >() + " >";
+}
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+template< typename Vector >
+#ifdef HAVE_CUDA
+__device__ __host__
+#endif
+Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: getValue( const MeshType& mesh,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType cellIndex,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const CoordinatesType& coordinates,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Vector& u,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
+          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities ) const
+{
+
+
+	if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or
+		 (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or
+		 (coordinates.y() == 0 && (boundaryCondition & 8)) or
+		 (coordinates.y() == mesh.getDimensions().y() - 1  && (boundaryCondition & 1)))
+		 /*and
+		 !(		 (coordinates.y() == 0 or coordinates.y() == mesh.getDimensions().y() - 1)
+				 and
+				 ( coordinates.x() == 0 or coordinates.x() == mesh.getDimensions().x() - 1)
+		  )*/
+		)
+	{
+		return 0.0;
+	}
+
+
+	//-----------------------------------
+
+	RealType signui;
+	signui = sign(u[cellIndex],this->epsilon);
+
+
+#ifdef HAVE_CUDA
+	//printf("%d   :    %d ;;;; %d   :   %d  , %f \n",threadIdx.x, mesh.getDimensions().x() , threadIdx.y,mesh.getDimensions().y(), epsilon );
+#endif
+
+	RealType xb = u[cellIndex];
+	RealType xf = -u[cellIndex];
+	RealType yb = u[cellIndex];
+	RealType yf = -u[cellIndex];
+	RealType a,b,c;
+
+
+	   if(coordinates.x() == mesh.getDimensions().x() - 1)
+		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+	   else
+		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+
+	   if(coordinates.x() == 0)
+		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+	   else
+		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+
+	   if(coordinates.y() == mesh.getDimensions().y() - 1)
+		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+	   else
+		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+
+	   if(coordinates.y() == 0)
+		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+	   else
+		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+
+
+	   //xb *= ihx;
+	   //xf *= ihx;
+	  // yb *= ihy;
+	   //yf *= ihy;
+
+	   if(signui > 0.0)
+	   {
+		   xf = negativePart(xf);
+
+		   xb = positivePart(xb);
+
+		   yf = negativePart(yf);
+
+		   yb = positivePart(yb);
+
+	   }
+	   else if(signui < 0.0)
+	   {
+
+		   xb = negativePart(xb);
+
+		   xf = positivePart(xf);
+
+		   yb = negativePart(yb);
+
+		   yf = positivePart(yf);
+	   }
+
+
+	   if(xb - xf > 0.0)
+		   a = xb;
+	   else
+		   a = xf;
+
+	   if(yb - yf > 0.0)
+		   b = yb;
+	   else
+		   b = yf;
+
+	   c =(1.0 - sqrt(a*a+b*b)*ihx );
+
+	   if(Sign(c) > 0.0 )
+		   return Sign(u[cellIndex])*c;
+	   else
+		   return signui*c;
+
+	   //--------------------------------------------------
+
+//
+//
+//
+//	RealType acc = hx*hy*hx*hy;
+//
+//	RealType nabla, xb, xf, yb, yf, signui;
+//
+//	signui = sign(u[cellIndex],this->epsilon);
+//
+//
+//	//if(fabs(u[cellIndex]) < acc) return 0.0;
+//
+//	   if(signui > 0.0)
+//	   {
+//	/**/ /*  if(boundaryCondition & 2)
+//			   xf = (u[mesh.getCellXSuccessor( cellIndex )] - u[cellIndex])/hx;
+//		   else *//*if(boundaryCondition & 4)
+//			   xf = 0.0;
+//		   else /**/if(coordinates.x() == mesh.getDimensions().x() - 1)
+//			   xf = negativePart((u[neighbourEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
+//		   else
+//			   xf = negativePart((u[neighbourEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
+//
+//	/**/ /*  if(boundaryCondition & 4)
+//			   xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx;
+//		   else *//*if(boundaryCondition & 2)
+//			   xb = 0.0;
+//		   else /**/if(coordinates.x() == 0)
+//			   xb = positivePart((u[cellIndex] - u[mesh.template getCellNextToCell<+1,0>( cellIndex )])*ihx);
+//		   else
+//			   xb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1,  0 >()])*ihx);
+//
+//	/**/  /* if(boundaryCondition & 1)
+//			   yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy;
+//		   else *//*if(boundaryCondition & 8)
+//			   yf = 0.0;
+//		   else /**/if(coordinates.y() == mesh.getDimensions().y() - 1)
+//			   yf = negativePart((u[neighbourEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
+//		   else
+//			   yf = negativePart((u[neighbourEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
+//
+//	/**/  /* if(boundaryCondition & 8)
+//			   yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy;
+//		   else *//*if(boundaryCondition & 1)
+//			   yb = 0.0;
+//		   else /**/if(coordinates.y() == 0)
+//			   yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  1 >()])*ihy);
+//		   else
+//			   yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  -1 >()])*ihy);
+//
+//		   if(xb - xf > 0.0)
+//			   xf = 0.0;
+//		   else
+//			   xb = 0.0;
+//
+//		   if(yb - yf > 0.0)
+//			   yf = 0.0;
+//		   else
+//			   yb = 0.0;
+//
+//		   nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb );
+//		   if(fabs(1.0-nabla) < acc)
+//			   return 0.0;
+//		   return signui*(1.0 - nabla);
+//	   }
+//	   else if (signui < 0.0)
+//	   {
+//
+//	/**/  /* if(boundaryCondition & 2)
+//			   xf = (u[mesh.getCellXSuccessor( cellIndex )] - u[cellIndex])*ihx;
+//		   else*//* if(boundaryCondition & 4)
+//			   xf = 0.0;
+//		   else /**/if(coordinates.x() == mesh.getDimensions().x() - 1)
+//			   xf = positivePart((u[neighbourEntities.template getEntityIndex< -1,  0 >()] - u[cellIndex])*ihx);
+//		   else
+//			   xf = positivePart((u[neighbourEntities.template getEntityIndex< 1,  0 >()] - u[cellIndex])*ihx);
+//
+//	/**/  /* if(boundaryCondition & 4)
+//			   xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx;
+//		   else*//* if(boundaryCondition & 2)
+//			   xb = 0.0;
+//		   else /**/if(coordinates.x() == 0)
+//			   xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 1,  0 >()])*ihx);
+//		   else
+//			   xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1,  0 >()])*ihx);
+//
+//	/**/ /*  if(boundaryCondition & 1)
+//			   yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy;
+//		   else *//*if(boundaryCondition & 8)
+//			   yf = 0.0;
+//		   else /**/if(coordinates.y() == mesh.getDimensions().y() - 1)
+//			   yf = positivePart((u[neighbourEntities.template getEntityIndex< 0,  -1 >()] - u[cellIndex])*ihy);
+//		   else
+//			   yf = positivePart((u[neighbourEntities.template getEntityIndex< 0,  1 >()] - u[cellIndex])*ihy);
+//
+//	/**/  /* if(boundaryCondition & 8)
+//			   yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy;
+//		   else*//* if(boundaryCondition & 1)
+//			   yb = 0.0;
+//		   else /**/if(coordinates.y() == 0)
+//			   yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  1 >()])*ihy);
+//		   else
+//			   yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0,  -1 >()])*ihy);
+//
+//
+//		   if(xb - xf > 0.0)
+//			   xf = 0.0;
+//		   else
+//			   xb = 0.0;
+//
+//		   if(yb - yf > 0.0)
+//			   yf = 0.0;
+//		   else
+//			   yb = 0.0;
+//
+//		   nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb );
+//
+//		   if(fabs(1.0-nabla) < acc)
+//			   return 0.0;
+//		   return signui*(1.0 - nabla);
+//	   }
+//	   else
+//	   {
+//		   return 0.0;
+//	   }
+
+}
+
+
+
+
+
+
+
+
+
+template< typename MeshReal,
+          typename Device,
+          typename MeshIndex,
+          typename Real,
+          typename Index >
+
+#ifdef HAVE_CUDA
+__device__
+#endif
+Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: getValueDev( const MeshType& mesh,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType cellIndex,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const CoordinatesType& coordinates,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real* u,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const Real& time,
+          	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 const IndexType boundaryCondition,
+          	  	  	  	  	  	  	  	  	  	                     const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities,
+          	  	  	  	  	  	  	  	  	  	                     const Real* map) const
+{
+//	int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x;
+
+	RealType signui;
+//	if(boundaryCondition == 0)
+		signui = sign(u[cellIndex],/*(boundaryCondition != 0) * */this->epsilon);
+//	else
+//		signui = Sign(u[cellIndex]);
+
+	RealType value;
+//	if(map[cellIndex] == 0.0)
+//	{
+////		value = INT_MAX;
+//		u[cellIndex] = Sign(u[cellIndex])*INT_MAX;
+//		return 0.0;
+//	}
+//	else
+		value = 50.0/ map[cellIndex];
+
+
+	RealType xb = u[cellIndex];
+	RealType xf = -u[cellIndex];
+	RealType yb = u[cellIndex];
+	RealType yf = -u[cellIndex];
+	RealType a,b,c;
+
+
+	   if(coordinates.x() == mesh.getDimensions().x() - 1)
+		   xf += u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+	   else
+		   xf += u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+
+	   if(coordinates.x() == 0)
+		   xb -= u[neighbourEntities.template getEntityIndex< 1,  0 >()];
+	   else
+		   xb -= u[neighbourEntities.template getEntityIndex< -1,  0 >()];
+
+	   if(coordinates.y() == mesh.getDimensions().y() - 1)
+		   yf += u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+	   else
+		   yf += u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+
+	   if(coordinates.y() == 0)
+		   yb -= u[neighbourEntities.template getEntityIndex< 0,  1 >()];
+	   else
+		   yb -= u[neighbourEntities.template getEntityIndex< 0,  -1 >()];
+
+
+	   if(signui > 0.0)
+	   {
+		   xf = negativePart(xf);
+
+		   xb = positivePart(xb);
+
+		   yf = negativePart(yf);
+
+		   yb = positivePart(yb);
+
+	   }
+	   else if(signui < 0.0)
+	   {
+
+		   xb = negativePart(xb);
+
+		   xf = positivePart(xf);
+
+		   yb = negativePart(yb);
+
+		   yf = positivePart(yf);
+	   }
+
+	   /*if(boundaryCondition &1)
+		   yb = 0.0;
+	   else
+		   yf = 0.0;
+
+	   if(boundaryCondition & 2)
+		   xb = 0.0;
+	   else
+		   xf = 0.0;*/
+
+	   if(xb - xf > 0.0)
+		   a = xb;
+	   else
+		   a = xf;
+
+	   if(yb - yf > 0.0)
+		   b = yb;
+	   else
+		   b = yf;
+
+	   c =(value - sqrt(a*a+b*b)*ihx );
+
+	   if(c > 0.0 )
+		   return Sign(u[cellIndex])*c;
+	   else
+
+	   return signui*c;
+}
+
+
+#endif /* PARALLELGODUNOVMAP2D_IMPL_H_ */