Loading examples/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ double atomicFabsMin(double* address, double val) unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); } while (assumed != old); return __longlong_as_double(old); } Loading examples/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h +12 −310 Original line number Diff line number Diff line Loading @@ -81,7 +81,7 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: return false; } h = Mesh.getHx(); this->h = Mesh.getHx(); counter = 0; const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" ); Loading Loading @@ -110,6 +110,8 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: dim3 threadsPerBlock(8, 8,8); dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); cudaDeviceSynchronize(); checkCudaDevice; initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); cudaDeviceSynchronize(); checkCudaDevice; Loading @@ -128,51 +130,9 @@ template< typename MeshReal, typename Index > bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() { // // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // // dofVector.save("u-00001.tnl"); int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(1, 512); dim3 threadsPerBlock(1, 1024); dim3 numBlocks(8,1); Loading Loading @@ -229,7 +189,6 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: cudaDofVector2[Mesh.template getCellNextToCell<0,1,0>(index)] ); } if( k == 0 ) c = cudaDofVector2[Mesh.template getCellNextToCell<0,0,1>(index)]; else if( k == Mesh.getDimensions().z() - 1 ) Loading @@ -240,17 +199,13 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: cudaDofVector2[Mesh.template getCellNextToCell<0,0,1>(index)] ); } Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); Real hD= 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); if(hD >= 3.0*h*h) if(hD < 0.0) tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h; else tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(3.0*h*h-hD) ); tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) ); // cudaDofVector2[index] = fabsMin(value, tmp); atomicFabsMin(&cudaDofVector2[index],tmp); } Loading @@ -262,257 +217,16 @@ template< typename MeshReal, typename Real, typename Index > __device__ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; int k = blockDim.z*blockIdx.z + threadIdx.z; int gid = Mesh.getCellIndex(CoordinatesType(i,j,k)); cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]); if(abs(cudaDofVector[gid]) < 1.8*this->h) if(abs(cudaDofVector[gid]) < 1.8*h) cudaDofVector2[gid] = cudaDofVector[gid]; // if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1111(i,j); // else // setupSquare1110(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1101(i,j); // else // setupSquare1100(i,j); // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1011(i,j); // else // setupSquare1010(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1001(i,j); // else // setupSquare1000(i,j); // } // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0111(i,j); // else // setupSquare0110(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0101(i,j); // else // setupSquare0100(i,j); // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0011(i,j); // else // setupSquare0010(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0001(i,j); // else // setupSquare0000(i,j); // } // } // } // // } else cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]); return true; // // int total = blockDim.x*gridDim.x; // // // // Real tmp = 0.0; // int flag = 0; // counter = 0; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // // // if(!exactInput) // { // cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*Sign(cudaDofVector[gid]); // } // __threadfence(); //// printf("-----------------------------------------------------------------------------------\n"); // // __threadfence(); // // if(gx > 0 && gx < Mesh.getDimensions().x()-1) // { // if(gy > 0 && gy < Mesh.getDimensions().y()-1) // { // // Index j = gy; // Index i = gx; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // } // //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); //// printf("****************************************************************\n"); //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) // { //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // Index j = 0; // Index i = gx; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); // if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) // { // Index i = gx; // Index j = Mesh.getDimensions().y() - 1; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); // if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) // { // Index j = gy; // Index i = 0; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } //// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); // if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) // { // Index j = gy; // Index i = Mesh.getDimensions().x() - 1; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("##################################################################################################\n"); // if(gx == Mesh.getDimensions().x() - 1 && // gy == Mesh.getDimensions().y() - 1) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // if(gx == Mesh.getDimensions().x() - 1 && // gy == 0) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } //// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); // if(gx == 0 && // gy == Mesh.getDimensions().y() - 1) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // if(gx == 0 && // gy == 0) // { //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // // __threadfence(); // // if(flag==1) // cudaDofVector[gid] = tmp*3; } Loading @@ -525,10 +239,6 @@ __device__ Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) { Real fx = abs(x); //Real fy = abs(y); //Real tmpMin = Min(fx,abs(y)); if(Min(fx,abs(y)) == fx) return x; else Loading @@ -545,16 +255,8 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, doub int gx = 0; int gy = threadIdx.y; //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) // return; int n = solver->Mesh.getDimensions().x(); int blockCount = n/blockDim.y +1; //int gid = solver->Mesh.getDimensions().x() * gy + gx; //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); //int id1 = gx+gy; //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; if(blockIdx.x==0) { Loading Loading @@ -774,7 +476,7 @@ __global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, dou if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) { solver->initGrid(); solver->initGrid(gx,gy,gz); } Loading examples/fast-sweeping/tnlFastSweeping3D_impl.h +7 −224 Original line number Diff line number Diff line Loading @@ -66,7 +66,7 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: exactInput=false; else exactInput=true; cout << "bla "<<endl; // cout << "bla "<<endl; return initGrid(); } Loading @@ -78,231 +78,14 @@ template< typename MeshReal, typename Index > bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() { for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) { dofVector2[i]=INT_MAX*Sign(dofVector[i]); if (abs(dofVector[i]) < 1.8*h) dofVector2[i]=dofVector[i]; else dofVector2[i]=INT_MAX*Sign(dofVector[i]); } cout << "bla 2"<<endl; // // for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) // { // for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1111(i,j); // else // setupSquare1110(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1101(i,j); // else // setupSquare1100(i,j); // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1011(i,j); // else // setupSquare1010(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1001(i,j); // else // setupSquare1000(i,j); // } // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0111(i,j); // else // setupSquare0110(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0101(i,j); // else // setupSquare0100(i,j); // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0011(i,j); // else // setupSquare0010(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0001(i,j); // else // setupSquare0000(i,j); // } // } // } // // } // } // Real tmp = 0.0; // Real ax=0.5/sqrt(2.0); // // if(!exactInput) // { // for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) // dofVector[i]=0.5*h*Sign(dofVector[i]); // } // // // for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) // { // for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) // { // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // } // // // // for(int i = 1; i < Mesh.getDimensions().x()-1; i++) // { // Index j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int i = 1; i < Mesh.getDimensions().x()-1; i++) // { // Index j = Mesh.getDimensions().y() - 1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int j = 1; j < Mesh.getDimensions().y()-1; j++) // { // Index i = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int j = 1; j < Mesh.getDimensions().y()-1; j++) // { // Index i = Mesh.getDimensions().x() - 1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // // Index i = Mesh.getDimensions().x() - 1; // Index j = Mesh.getDimensions().y() - 1; // // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // i = 0; // j = Mesh.getDimensions().y() -1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; dofVector2.save("u-00000.tnl"); return true; } Loading Loading @@ -475,12 +258,12 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: dofVector2[Mesh.template getCellNextToCell<0,0,1>(index)] ); } Real hD= 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); if(hD >= 3.0*h) if(hD < 0.0) tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h; else tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(3.0*h*h-hD) ); tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) ); dofVector2[index] = fabsMin(value, tmp); Loading examples/fast-sweeping/tnlFastSweeping_CUDA.h +1 −1 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ public: __host__ bool run(); #ifdef HAVE_CUDA __device__ bool initGrid(); __device__ bool initGrid(int i, int j, int k); __device__ void updateValue(const Index i, const Index j, const Index k); __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); __device__ Real fabsMin(const Real x, const Real y); Loading Loading
examples/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ double atomicFabsMin(double* address, double val) unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); } while (assumed != old); return __longlong_as_double(old); } Loading
examples/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h +12 −310 Original line number Diff line number Diff line Loading @@ -81,7 +81,7 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: return false; } h = Mesh.getHx(); this->h = Mesh.getHx(); counter = 0; const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" ); Loading Loading @@ -110,6 +110,8 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: dim3 threadsPerBlock(8, 8,8); dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); cudaDeviceSynchronize(); checkCudaDevice; initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); cudaDeviceSynchronize(); checkCudaDevice; Loading @@ -128,51 +130,9 @@ template< typename MeshReal, typename Index > bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() { // // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // // dofVector.save("u-00001.tnl"); int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(1, 512); dim3 threadsPerBlock(1, 1024); dim3 numBlocks(8,1); Loading Loading @@ -229,7 +189,6 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: cudaDofVector2[Mesh.template getCellNextToCell<0,1,0>(index)] ); } if( k == 0 ) c = cudaDofVector2[Mesh.template getCellNextToCell<0,0,1>(index)]; else if( k == Mesh.getDimensions().z() - 1 ) Loading @@ -240,17 +199,13 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: cudaDofVector2[Mesh.template getCellNextToCell<0,0,1>(index)] ); } Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); Real hD= 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); if(hD >= 3.0*h*h) if(hD < 0.0) tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h; else tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(3.0*h*h-hD) ); tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) ); // cudaDofVector2[index] = fabsMin(value, tmp); atomicFabsMin(&cudaDofVector2[index],tmp); } Loading @@ -262,257 +217,16 @@ template< typename MeshReal, typename Real, typename Index > __device__ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) { int i = threadIdx.x + blockDim.x*blockIdx.x; int j = blockDim.y*blockIdx.y + threadIdx.y; int k = blockDim.z*blockIdx.z + threadIdx.z; int gid = Mesh.getCellIndex(CoordinatesType(i,j,k)); cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]); if(abs(cudaDofVector[gid]) < 1.8*this->h) if(abs(cudaDofVector[gid]) < 1.8*h) cudaDofVector2[gid] = cudaDofVector[gid]; // if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1111(i,j); // else // setupSquare1110(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1101(i,j); // else // setupSquare1100(i,j); // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1011(i,j); // else // setupSquare1010(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1001(i,j); // else // setupSquare1000(i,j); // } // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0111(i,j); // else // setupSquare0110(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0101(i,j); // else // setupSquare0100(i,j); // } // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0011(i,j); // else // setupSquare0010(i,j); // } // else // { // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0001(i,j); // else // setupSquare0000(i,j); // } // } // } // // } else cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]); return true; // // int total = blockDim.x*gridDim.x; // // // // Real tmp = 0.0; // int flag = 0; // counter = 0; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // // // if(!exactInput) // { // cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*Sign(cudaDofVector[gid]); // } // __threadfence(); //// printf("-----------------------------------------------------------------------------------\n"); // // __threadfence(); // // if(gx > 0 && gx < Mesh.getDimensions().x()-1) // { // if(gy > 0 && gy < Mesh.getDimensions().y()-1) // { // // Index j = gy; // Index i = gx; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // } // //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); //// printf("****************************************************************\n"); //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) // { //// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // Index j = 0; // Index i = gx; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); // if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) // { // Index i = gx; // Index j = Mesh.getDimensions().y() - 1; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); // if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) // { // Index j = gy; // Index i = 0; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } //// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); // if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) // { // Index j = gy; // Index i = Mesh.getDimensions().x() - 1; //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // //// printf("##################################################################################################\n"); // if(gx == Mesh.getDimensions().x() - 1 && // gy == Mesh.getDimensions().y() - 1) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // if(gx == Mesh.getDimensions().x() - 1 && // gy == 0) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } //// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); // if(gx == 0 && // gy == Mesh.getDimensions().y() - 1) // { // //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // if(gx == 0 && // gy == 0) // { //// tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); // if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && // cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) // // flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; // } // // __threadfence(); // // if(flag==1) // cudaDofVector[gid] = tmp*3; } Loading @@ -525,10 +239,6 @@ __device__ Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) { Real fx = abs(x); //Real fy = abs(y); //Real tmpMin = Min(fx,abs(y)); if(Min(fx,abs(y)) == fx) return x; else Loading @@ -545,16 +255,8 @@ __global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, doub int gx = 0; int gy = threadIdx.y; //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) // return; int n = solver->Mesh.getDimensions().x(); int blockCount = n/blockDim.y +1; //int gid = solver->Mesh.getDimensions().x() * gy + gx; //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); //int id1 = gx+gy; //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; if(blockIdx.x==0) { Loading Loading @@ -774,7 +476,7 @@ __global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, tnlHost, int >, dou if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) { solver->initGrid(); solver->initGrid(gx,gy,gz); } Loading
examples/fast-sweeping/tnlFastSweeping3D_impl.h +7 −224 Original line number Diff line number Diff line Loading @@ -66,7 +66,7 @@ bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: exactInput=false; else exactInput=true; cout << "bla "<<endl; // cout << "bla "<<endl; return initGrid(); } Loading @@ -78,231 +78,14 @@ template< typename MeshReal, typename Index > bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() { for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) { dofVector2[i]=INT_MAX*Sign(dofVector[i]); if (abs(dofVector[i]) < 1.8*h) dofVector2[i]=dofVector[i]; else dofVector2[i]=INT_MAX*Sign(dofVector[i]); } cout << "bla 2"<<endl; // // for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) // { // for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1111(i,j); // else // setupSquare1110(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1101(i,j); // else // setupSquare1100(i,j); // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1011(i,j); // else // setupSquare1010(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare1001(i,j); // else // setupSquare1000(i,j); // } // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0111(i,j); // else // setupSquare0110(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0101(i,j); // else // setupSquare0100(i,j); // } // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))] > 0) // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0011(i,j); // else // setupSquare0010(i,j); // } // else // { // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j+1))] > 0) // setupSquare0001(i,j); // else // setupSquare0000(i,j); // } // } // } // // } // } // Real tmp = 0.0; // Real ax=0.5/sqrt(2.0); // // if(!exactInput) // { // for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) // dofVector[i]=0.5*h*Sign(dofVector[i]); // } // // // for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) // { // for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) // { // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // } // // // // for(int i = 1; i < Mesh.getDimensions().x()-1; i++) // { // Index j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int i = 1; i < Mesh.getDimensions().x()-1; i++) // { // Index j = Mesh.getDimensions().y() - 1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int j = 1; j < Mesh.getDimensions().y()-1; j++) // { // Index i = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // for(int j = 1; j < Mesh.getDimensions().y()-1; j++) // { // Index i = Mesh.getDimensions().x() - 1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // // // if(tmp == 0.0) // {} // else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) // {} // else // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // } // // // Index i = Mesh.getDimensions().x() - 1; // Index j = Mesh.getDimensions().y() - 1; // // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // i = 0; // j = Mesh.getDimensions().y() -1; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; // // // // j = 0; // tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); // if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && // dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) // // dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; dofVector2.save("u-00000.tnl"); return true; } Loading Loading @@ -475,12 +258,12 @@ void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: dofVector2[Mesh.template getCellNextToCell<0,0,1>(index)] ); } Real hD= 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); if(hD >= 3.0*h) if(hD < 0.0) tmp = fabsMin(a,fabsMin(b,c)) + Sign(value)*h; else tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(3.0*h*h-hD) ); tmp = (1.0/3.0) * ( a + b + c + Sign(value)*sqrt(hD) ); dofVector2[index] = fabsMin(value, tmp); Loading
examples/fast-sweeping/tnlFastSweeping_CUDA.h +1 −1 Original line number Diff line number Diff line Loading @@ -134,7 +134,7 @@ public: __host__ bool run(); #ifdef HAVE_CUDA __device__ bool initGrid(); __device__ bool initGrid(int i, int j, int k); __device__ void updateValue(const Index i, const Index j, const Index k); __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); __device__ Real fabsMin(const Real x, const Real y); Loading