diff --git a/examples/fast-sweeping/main.h b/examples/fast-sweeping/main.h index aabaf684ca797a09f6c37c6fffc864dc000cc93e..3ca90ccf0cf1a9347838ea7b4852733aaab43ea0 100644 --- a/examples/fast-sweeping/main.h +++ b/examples/fast-sweeping/main.h @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: -#include "tnlFastSweeping.h" +//#include "tnlFastSweeping.h" //for DEVICE versions: -//#include "tnlFastSweeping_CUDA.h" +#include "tnlFastSweeping_CUDA.h" #include "fastSweepingConfig.h" #include <solvers/tnlConfigTags.h> diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h index 91ec8afa4f75c2ac031a037d1e9e8e027108609e..7399c2d36bdce5827098d5a689d58839cfee8b95 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h @@ -1023,10 +1023,11 @@ __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { //int j = threadIdx.x + threadIdx.y * blockDim.x; - int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; + int th = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows + (blockIdx.y) * caller->n*caller->n*caller->gridCols + + (blockIdx.x) * caller->n + + threadIdx.y * caller->n*caller->gridCols + + threadIdx.x; //printf("i= %d,j= %d,th= %d\n",i,j,th); *a = caller->work_u_cuda[th]; //printf("Hi %f \n", *a); @@ -1038,11 +1039,12 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { - int j = threadIdx.x + threadIdx.y * blockDim.x; - int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; +// int j = threadIdx.x + threadIdx.y * blockDim.x; + int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows + (blockIdx.y) * caller->n*caller->n*caller->gridCols + + (blockIdx.x) * caller->n + + threadIdx.y * caller->n*caller->gridCols + + threadIdx.x; if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) { @@ -1064,10 +1066,11 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // int j = threadIdx.x + threadIdx.y * blockDim.x; //printf("j = %d, u = %f\n", j,u); - int index = (blockIdx.y)*this->n*this->n*this->gridCols - + (blockIdx.x)*this->n - + threadIdx.y*this->n*this->gridCols - + threadIdx.x; + int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows + (blockIdx.y) * caller->n*caller->n*caller->gridCols + + (blockIdx.x) * caller->n + + threadIdx.y * caller->n*caller->gridCols + + threadIdx.x; //printf("i= %d,j= %d,index= %d\n",i,j,index); if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) )