From 3ef0d2200d84980656a20aa52870f459fe24d5cd Mon Sep 17 00:00:00 2001 From: Tomas Sobotik <sobotto4@fjfi.cvut.cz> Date: Sun, 20 Mar 2016 18:48:19 +0100 Subject: [PATCH] Merging parallel solver 4 --- .../tnlParallelEikonalSolver3D_impl.h | 36 ++++++++++--------- src/core/tnlCuda.cpp | 27 +++++++++++++- src/core/tnlCuda.cu | 24 +++++++++++++ src/core/tnlCuda.h | 9 +++++ 4 files changed, 79 insertions(+), 17 deletions(-) diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h index 9ffeb8e066..e2171e72dd 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h @@ -635,8 +635,8 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: for( int j = 0; j<this->n*this->gridLevels; j++) { int l = j/this->n; - - this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+(j-l)*levelSize]; + if(j - idealStretch < 0) + this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+(j-l)*levelSize]; } } @@ -1178,25 +1178,25 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: { Ent.setCoordinates(tnlStaticVector<3,int>(i,0,k)); Ent.refresh(); - u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y+this->n); + u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y+this->n); } else if(boundaryCondition == 1) { Ent.setCoordinates(tnlStaticVector<3,int>(i,blockDim.y - 1,k)); Ent.refresh(); - u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); + u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); } else if(boundaryCondition == 32) { Ent.setCoordinates(tnlStaticVector<3,int>(i,j,0)); Ent.refresh(); - u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.z); + u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(threadIdx.z); } else if(boundaryCondition == 16) { Ent.setCoordinates(tnlStaticVector<3,int>(i,j,blockDim.z - 1)); Ent.refresh(); - u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(this->n - 1 - threadIdx.z) ; + u[l] = u[Ent.getIndex()] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(this->n - 1 - threadIdx.z) ; } } } @@ -1225,7 +1225,7 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if(computeFU) fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<3,int>(i,j,k)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighbourEntities); - sharedTau[l]=cfl/fabs(fu); + sharedTau[l]=cfl/abs(fu); if(l == 0) { @@ -1235,10 +1235,11 @@ void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>:: if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - + if((blockDim.x == 8) && (l < 256)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); + __syncthreads(); if((blockDim.x == 8) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); __syncthreads(); - if((blockDim.x == 8) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); + if(l < 64) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); __syncthreads(); if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); @@ -1308,12 +1309,14 @@ __global__ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now { - __shared__ int boundary[4]; // north,east,west,south + __shared__ int boundary[6]; // north,east,west,south __shared__ int subgridValue; __shared__ int newSubgridValue; - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x + (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; + int gid = blockDim.x*blockIdx.x + threadIdx.x + + (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + + (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; double u = cudaSolver->work_u_cuda[gid]; double u_cmp; int subgridValue_cmp=INT_MAX; @@ -1331,7 +1334,7 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> boundary[4] = 0; boundary[5] = 0; newSubgridValue = 0; - printf("aaa z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); +// printf("aaa z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); } __syncthreads(); @@ -1388,19 +1391,20 @@ void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int> atomicMax(&newSubgridValue, INT_MAX); atomicMax(&boundary[boundary_index], 1); cudaSolver->work_u_cuda[gid] = u_cmp; + u=u_cmp; } __threadfence(); if(threadIdx.z == 0 && (blockIdx.z != 0)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y)*gridDim.x + blockIdx.x + (blockIdx.z-1)*gridDim.x*gridDim.y); + subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z-1)*gridDim.x*gridDim.y); boundary_index = 5; } if(threadIdx.z == blockDim.z - 1 && (blockIdx.z != gridDim.z - 1)/* && (cudaSolver->currentStep & 1)*/) { u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y)*gridDim.x + blockIdx.x + (blockIdx.z+1)*gridDim.x*gridDim.y); + subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z+1)*gridDim.x*gridDim.y); boundary_index = 4; } if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) @@ -1444,7 +1448,7 @@ void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Dev int stepValue = cudaSolver->currentStep + 4; if( cudaSolver->getSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) { - printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); + cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue); } @@ -1508,7 +1512,7 @@ void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, __shared__ int containsCurve; if(l == 0) { - printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); +// printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); containsCurve = 0; } diff --git a/src/core/tnlCuda.cpp b/src/core/tnlCuda.cpp index bd69fb2cab..20f6272a30 100644 --- a/src/core/tnlCuda.cpp +++ b/src/core/tnlCuda.cpp @@ -18,7 +18,9 @@ #include <core/tnlCuda.h> #include <core/mfuncs.h> #include <tnlConfig.h> - +#include <config/tnlConfigDescription.h> +#include <config/tnlParameterContainer.h> + tnlString tnlCuda :: getDeviceType() { return tnlString( "tnlCuda" ); @@ -46,3 +48,26 @@ int tnlCuda::getNumberOfGrids( const int blocks, }*/ +void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) +{ +#ifdef HAVE_CUDA + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computationon.", 0 ); +#else + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computationon (not supported on this system).", 0 ); +#endif +} + +bool tnlCuda::setup( const tnlParameterContainer& parameters, + const tnlString& prefix ) +{ +#ifdef HAVE_CUDA + int cudaDevice = parameters.getParameter< int >( "cuda-device" ); + if( cudaSetDevice( cudaDevice ) != cudaSuccess ) + { + std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl; + return false; + } +#endif + return true; +} + diff --git a/src/core/tnlCuda.cu b/src/core/tnlCuda.cu index 9178a3261e..113cba3522 100644 --- a/src/core/tnlCuda.cu +++ b/src/core/tnlCuda.cu @@ -16,6 +16,30 @@ ***************************************************************************/ #include <core/tnlCuda.h> +#include <config/tnlConfigDescription.h> +#include <config/tnlParameterContainer.h> + + +/*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) +{ +#ifdef HAVE_CUDA + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 ); +#else + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 ); +#endif +} + +bool tnlCuda::setup( const tnlParameterContainer& parameters, + const tnlString& prefix ) +{ + int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); +#ifdef HAVE_CUDA + cudaSetDevice( cudaDevice ); + checkCudaDevice; +#endif + return true; +} +*/ bool tnlCuda::checkDevice( const char* file_name, int line ) { diff --git a/src/core/tnlCuda.h b/src/core/tnlCuda.h index 6889f3f109..3d5e6eb6eb 100644 --- a/src/core/tnlCuda.h +++ b/src/core/tnlCuda.h @@ -24,6 +24,9 @@ #include <core/tnlString.h> #include <core/tnlAssert.h> +class tnlConfigDescription; +class tnlParameterContainer; + #ifdef HAVE_CUDA #define __cuda_callable__ __device__ __host__ #else @@ -90,6 +93,12 @@ class tnlCuda #else static bool checkDevice( const char* file_name, int line ) { return false;}; #endif + + static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" ); + + static bool setup( const tnlParameterContainer& parameters, + const tnlString& prefix = "" ); + }; -- GitLab