Loading examples/fast-sweeping-map/main.h +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: #include "tnlFastSweepingMap.h" //#include "tnlFastSweepingMap.h" //for DEVICE versions: //#include "tnlFastSweepingMap_CUDA.h" #include "tnlFastSweepingMap_CUDA.h" #include "fastSweepingMapConfig.h" #include <solvers/tnlBuildConfigTags.h> Loading examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h +2 −0 Original line number Diff line number Diff line Loading @@ -904,6 +904,8 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS double fu = 0.0; double finalTime = this->stopTime; if(boundaryCondition == 0) finalTime*=2.0; __syncthreads(); tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); Loading examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() { cout << "a" << endl; this->device = tnlHostDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU #ifdef HAVE_CUDA if(this->device == tnlCudaDevice) Loading examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h +70 −53 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ #ifndef TNLNARROWBAND2D_IMPL_H_ #define TNLNARROWBAND2D_IMPL_H_ #define NARROWBAND_SUBGRID_SIZE 8 #define NARROWBAND_SUBGRID_SIZE 16 #include "tnlNarrowBand.h" Loading Loading @@ -225,7 +225,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); checkCudaDevice; if(reinit != 0 && time != finalTime ) if(reinit != 0 /*&& time != finalTime */) { cout << time << endl; Loading Loading @@ -273,8 +273,8 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u { // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); /*if(cudaStatusVector[subgridID] != 0)*/ int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize; if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y()) { tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); Entity.setCoordinates(CoordinatesType(i,j)); Loading Loading @@ -353,32 +353,33 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i int gid = Entity.getIndex(); if(abs(cudaDofVector2[gid]) > 1.5*h) cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector2[gid]); if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) { if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h; } if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; } if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h; } if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; } } // if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) // { // if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h; // } // if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; // } // // if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h; // } // if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; // } // } // Loading Loading @@ -647,7 +648,9 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. if(blockIdx.x > 0) { atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); } if(blockIdx.x < gridDim.x - 1) atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); Loading Loading @@ -690,26 +693,40 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); double value = solver->cudaDofVector2[Entity.getIndex()]; double xf,xb,yf,yb, grad, fu, a,b; a = b = 0.0; if( i == 0 || (threadIdx.x == 0 && !(status & 9)) ) yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()]; xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; } else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) ) yb = yf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()] - value; } else { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; } if( j == 0 || (threadIdx.y == 0 && !(status & 3)) ) xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] ; yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; } else if( j == solver->Mesh.getDimensions().y() - 1 || (threadIdx.y == blockDim.y - 1 && !(status & 5)) ) xb = xf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()] - value; } else { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; } __syncthreads(); Loading Loading @@ -739,21 +756,21 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int } /* if(xb > xf) a = xb; if(xb > xf) a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); else a = xf; a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); if(yb > yf) b = yb; b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); else b = yf;*/ b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); grad = sqrt(/*0.5 **/ (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); // grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); // grad = sqrt(/*0.5 **/ (a*a + b*b ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); grad = sqrt(/*0.5 **/ (a*a + b*b ) ); fu = -1.0 * grad; Loading @@ -762,17 +779,17 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) ) if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) ) atomicMax(solver->reinitialize,1); else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) atomicMax(solver->reinitialize,1); else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) ) else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) ) atomicMax(solver->reinitialize,1); else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) atomicMax(solver->reinitialize,1); } solver->cudaDofVector2[Entity.getIndex()] = value+tau*fu; solver->cudaDofVector2[Entity.getIndex()] += tau*fu; } } } Loading examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h 0 → 100644 +1313 −0 File added.Preview size limit exceeded, changes collapsed. Show changes Loading
examples/fast-sweeping-map/main.h +2 −2 Original line number Diff line number Diff line Loading @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: #include "tnlFastSweepingMap.h" //#include "tnlFastSweepingMap.h" //for DEVICE versions: //#include "tnlFastSweepingMap_CUDA.h" #include "tnlFastSweepingMap_CUDA.h" #include "fastSweepingMapConfig.h" #include <solvers/tnlBuildConfigTags.h> Loading
examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h +2 −0 Original line number Diff line number Diff line Loading @@ -904,6 +904,8 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS double fu = 0.0; double finalTime = this->stopTime; if(boundaryCondition == 0) finalTime*=2.0; __syncthreads(); tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); Loading
examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -25,7 +25,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() { cout << "a" << endl; this->device = tnlHostDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU #ifdef HAVE_CUDA if(this->device == tnlCudaDevice) Loading
examples/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h +70 −53 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ #ifndef TNLNARROWBAND2D_IMPL_H_ #define TNLNARROWBAND2D_IMPL_H_ #define NARROWBAND_SUBGRID_SIZE 8 #define NARROWBAND_SUBGRID_SIZE 16 #include "tnlNarrowBand.h" Loading Loading @@ -225,7 +225,7 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: r cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); checkCudaDevice; if(reinit != 0 && time != finalTime ) if(reinit != 0 /*&& time != finalTime */) { cout << time << endl; Loading Loading @@ -273,8 +273,8 @@ void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: u { // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); /*if(cudaStatusVector[subgridID] != 0)*/ int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize; if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y()) { tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); Entity.setCoordinates(CoordinatesType(i,j)); Loading Loading @@ -353,32 +353,33 @@ bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: i int gid = Entity.getIndex(); if(abs(cudaDofVector2[gid]) > 1.5*h) cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector2[gid]); if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) { if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h; } if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; } if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h; } if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) { cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; } } // if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) // { // if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid+1] = Sign(cudaDofVector2[gid+1])*0.5*h; // } // if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid+Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; // } // // if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid-1] = Sign(cudaDofVector2[gid-1])*0.5*h; // } // if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) // { // cudaDofVector2[gid] = Sign(cudaDofVector2[gid])*0.5*h; // cudaDofVector2[gid-Mesh.getDimensions().x()] = Sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; // } // } // Loading Loading @@ -647,7 +648,9 @@ __global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, in // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. if(blockIdx.x > 0) { atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); } if(blockIdx.x < gridDim.x - 1) atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); Loading Loading @@ -690,26 +693,40 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int tnlNeighbourGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, tnlHost, int >, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); double value = solver->cudaDofVector2[Entity.getIndex()]; double xf,xb,yf,yb, grad, fu, a,b; a = b = 0.0; if( i == 0 || (threadIdx.x == 0 && !(status & 9)) ) yb = yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()]; xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; } else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) ) yb = yf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()] - value; } else { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] - value; } if( j == 0 || (threadIdx.y == 0 && !(status & 3)) ) xb = xf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] ; yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; } else if( j == solver->Mesh.getDimensions().y() - 1 || (threadIdx.y == blockDim.y - 1 && !(status & 5)) ) xb = xf = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; { yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; yf = solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()] - value; } else { xb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; xf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; yb = value - solver->cudaDofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; yf = solver-> cudaDofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] - value; } __syncthreads(); Loading Loading @@ -739,21 +756,21 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int } /* if(xb > xf) a = xb; if(xb > xf) a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); else a = xf; a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); if(yb > yf) b = yb; b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); else b = yf;*/ b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); grad = sqrt(/*0.5 **/ (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); // grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); // grad = sqrt(/*0.5 **/ (a*a + b*b ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); grad = sqrt(/*0.5 **/ (a*a + b*b ) ); fu = -1.0 * grad; Loading @@ -762,17 +779,17 @@ __global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, tnlHost, int // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, // 8 - to the east of curve, 16 - to the west of curve. if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) ) if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) ) atomicMax(solver->reinitialize,1); else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) atomicMax(solver->reinitialize,1); else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) ) else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) ) atomicMax(solver->reinitialize,1); else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) atomicMax(solver->reinitialize,1); } solver->cudaDofVector2[Entity.getIndex()] = value+tau*fu; solver->cudaDofVector2[Entity.getIndex()] += tau*fu; } } } Loading
examples/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h 0 → 100644 +1313 −0 File added.Preview size limit exceeded, changes collapsed. Show changes