Loading examples/fast-sweeping/main.h +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: #include "tnlFastSweeping.h" //#include "tnlFastSweeping.h" //for DEVICE versions: //#include "tnlFastSweeping_CUDA.h" #include "tnlFastSweeping_CUDA.h" #include "fastSweepingConfig.h" #include <solvers/tnlConfigTags.h> Loading examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +16 −13 Original line number Diff line number Diff line Loading @@ -1023,7 +1023,8 @@ __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { //int j = threadIdx.x + threadIdx.y * blockDim.x; int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols int th = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; Loading @@ -1038,8 +1039,9 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { int j = threadIdx.x + threadIdx.y * blockDim.x; int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols // int j = threadIdx.x + threadIdx.y * blockDim.x; int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; Loading @@ -1064,9 +1066,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // int j = threadIdx.x + threadIdx.y * blockDim.x; //printf("j = %d, u = %f\n", j,u); int index = (blockIdx.y)*this->n*this->n*this->gridCols + (blockIdx.x)*this->n + threadIdx.y*this->n*this->gridCols int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; //printf("i= %d,j= %d,index= %d\n",i,j,index); Loading Loading
examples/fast-sweeping/main.h +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: #include "tnlFastSweeping.h" //#include "tnlFastSweeping.h" //for DEVICE versions: //#include "tnlFastSweeping_CUDA.h" #include "tnlFastSweeping_CUDA.h" #include "fastSweepingConfig.h" #include <solvers/tnlConfigTags.h> Loading
examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +16 −13 Original line number Diff line number Diff line Loading @@ -1023,7 +1023,8 @@ __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { //int j = threadIdx.x + threadIdx.y * blockDim.x; int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols int th = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; Loading @@ -1038,8 +1039,9 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> __device__ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) { int j = threadIdx.x + threadIdx.y * blockDim.x; int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols // int j = threadIdx.x + threadIdx.y * blockDim.x; int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; Loading @@ -1064,9 +1066,10 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: // int j = threadIdx.x + threadIdx.y * blockDim.x; //printf("j = %d, u = %f\n", j,u); int index = (blockIdx.y)*this->n*this->n*this->gridCols + (blockIdx.x)*this->n + threadIdx.y*this->n*this->gridCols int index = (blockIdx.z*caller->n + threadIdx.z) * caller->n*caller->n*caller->gridCols*caller->gridRows (blockIdx.y) * caller->n*caller->n*caller->gridCols + (blockIdx.x) * caller->n + threadIdx.y * caller->n*caller->gridCols + threadIdx.x; //printf("i= %d,j= %d,index= %d\n",i,j,index); Loading