diff --git a/examples/fast-sweeping/main.h b/examples/fast-sweeping/main.h
index aabaf684ca797a09f6c37c6fffc864dc000cc93e..3ca90ccf0cf1a9347838ea7b4852733aaab43ea0 100644
--- a/examples/fast-sweeping/main.h
+++ b/examples/fast-sweeping/main.h
@@ -17,9 +17,9 @@
 
 #include "MainBuildConfig.h"
 	//for HOST versions:
-#include "tnlFastSweeping.h"
+//#include "tnlFastSweeping.h"
 	//for DEVICE versions:
-//#include "tnlFastSweeping_CUDA.h"
+#include "tnlFastSweeping_CUDA.h"
 #include "fastSweepingConfig.h"
 #include <solvers/tnlConfigTags.h>
 
diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
index 91ec8afa4f75c2ac031a037d1e9e8e027108609e..7399c2d36bdce5827098d5a689d58839cfee8b95 100644
--- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
+++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h
@@ -1023,10 +1023,11 @@ __device__
 void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
 {
 	//int j = threadIdx.x + threadIdx.y * blockDim.x;
-	int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
+	int th = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows
+			 (blockIdx.y) * caller->n*caller->n*caller->gridCols
+             + (blockIdx.x) * caller->n
+             + threadIdx.y * caller->n*caller->gridCols
+             + threadIdx.x;
 	//printf("i= %d,j= %d,th= %d\n",i,j,th);
 	*a = caller->work_u_cuda[th];
 	//printf("Hi %f \n", *a);
@@ -1038,11 +1039,12 @@ template< typename SchemeHost, typename SchemeDevice, typename Device>
 __device__
 void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a)
 {
-	int j = threadIdx.x + threadIdx.y * blockDim.x;
-	int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols
-            + (blockIdx.x) * caller->n
-            + threadIdx.y * caller->n*caller->gridCols
-            + threadIdx.x;
+//	int j = threadIdx.x + threadIdx.y * blockDim.x;
+	int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows
+				(blockIdx.y) * caller->n*caller->n*caller->gridCols
+				+ (blockIdx.x) * caller->n
+				+ threadIdx.y * caller->n*caller->gridCols
+				+ threadIdx.x;
 
 	if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) )
 	{
@@ -1064,10 +1066,11 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::
 //	int j = threadIdx.x + threadIdx.y * blockDim.x;
 	//printf("j = %d, u = %f\n", j,u);
 
-		int index = (blockIdx.y)*this->n*this->n*this->gridCols
-					+ (blockIdx.x)*this->n
-					+ threadIdx.y*this->n*this->gridCols
-					+ threadIdx.x;
+		int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows
+				 (blockIdx.y) * caller->n*caller->n*caller->gridCols
+	             + (blockIdx.x) * caller->n
+	             + threadIdx.y * caller->n*caller->gridCols
+	             + threadIdx.x;
 
 		//printf("i= %d,j= %d,index= %d\n",i,j,index);
 		if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )