Loading examples/fast-sweeping/fastSweepingConfig.h +1 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ class fastSweepingConfig config.addEntry < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); config.addRequiredEntry < tnlString > ( "initial-condition", "Initial condition for solver"); config.addEntry < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" ); config.addEntry < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); } }; Loading examples/fast-sweeping/main.h +5 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,10 @@ #include "MainBuildConfig.h" #include "tnlFastSweeping.h" //for HOST versions: //#include "tnlFastSweeping.h" //for DEVICE versions: #include "tnlFastSweeping_CUDA.h" #include "fastSweepingConfig.h" #include <solvers/tnlConfigTags.h> Loading Loading @@ -46,6 +49,7 @@ int main( int argc, char* argv[] ) cerr << "Solver failed to initialize." << endl; return EXIT_FAILURE; } checkCudaDevice; cout << "-------------------------------------------------------------" << endl; cout << "Starting solver..." << endl; solver.run(); Loading examples/fast-sweeping/tnlFastSweeping.h +19 −3 Original line number Diff line number Diff line Loading @@ -24,6 +24,9 @@ #include <limits.h> #include <core/tnlDevice.h> #include <ctime> #ifdef HAVE_OPENMP #include <omp.h> #endif Loading Loading @@ -60,7 +63,12 @@ public: bool initGrid(); bool run(); void updateValue(const Index i, const Index j); //for single core version use this implementation: //void updateValue(const Index i, const Index j); //for parallel version use this one instead: void updateValue(const Index i, const Index j, DofVectorType* grid); Real fabsMin(const Real x, const Real y); Loading @@ -68,14 +76,22 @@ protected: MeshType Mesh; bool exactInput; DofVectorType dofVector; RealType h; #ifdef HAVE_OPENMP // omp_lock_t* gridLock; #endif }; }; #include "tnlFastSweeping2D_impl.h" //for single core version use this implementation: //#include "tnlFastSweeping2D_impl.h" //for parallel version use this one instead: #include "tnlFastSweeping2D_openMP_impl.h" #endif /* TNLFASTSWEEPING_H_ */ examples/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h 0 → 100644 +522 −0 Original line number Diff line number Diff line /*************************************************************************** tnlFastSweeping_impl.h - description ------------------- begin : Oct 15 , 2015 copyright : (C) 2015 by Tomas Sobotik ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef TNLFASTSWEEPING2D_IMPL_H_ #define TNLFASTSWEEPING2D_IMPL_H_ #include "tnlFastSweeping.h" template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > tnlString tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() { return tnlString( "tnlFastSweeping< " ) + MeshType::getType() + ", " + ::getType< Real >() + ", " + ::getType< Index >() + " >"; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters ) { const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" ); if( ! Mesh.load( meshFile ) ) { cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl; return false; } const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition"); if( ! dofVector.load( initialCondition ) ) { cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl; return false; } h = Mesh.getHx(); counter = 0; const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" ); if(exact_input == "no") exactInput=false; else exactInput=true; #ifdef HAVE_CUDA cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); #endif int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(16, 16); dim3 numBlocks(n/16 + 1 ,n/16 +1); initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); cudaDeviceSynchronize(); checkCudaDevice; return true; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() { // // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // // dofVector.save("u-00001.tnl"); int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(32, 32); dim3 numBlocks(n/32 + 1 ,n/32 +1); for(int i = 2*n - 1; i > -1; i--) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 0; i < 2*n ; i++) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 0; i < 2*n ; i++) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 2*n - 1; i > -1; i--) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFree(cudaDofVector); cudaFree(cudaDofVector2); cudaFree(cudaSolver); dofVector.save("u-00001.tnl"); cudaDeviceSynchronize(); return true; } #ifdef HAVE_CUDA template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) { Index index = Mesh.getCellIndex(CoordinatesType(i,j)); Real value = cudaDofVector[index]; Real a,b, tmp; if( i == 0 ) a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; else if( i == Mesh.getDimensions().x() - 1 ) a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; else { a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); } if( j == 0 ) b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; else if( j == Mesh.getDimensions().y() - 1 ) b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; else { b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); } if(abs(a-b) >= h) tmp = fabsMin(a,b) + Sign(value)*h; else tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); cudaDofVector[index] = fabsMin(value, tmp); } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); int total = blockDim.x*gridDim.x; Real tmp = 0.0; int flag = 0; counter = 0; tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(!exactInput) { cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*Sign(cudaDofVector[gid]); } __threadfence(); // printf("-----------------------------------------------------------------------------------\n"); __threadfence(); if(gx > 0 && gx < Mesh.getDimensions().x()-1) { if(gy > 0 && gy < Mesh.getDimensions().y()-1) { Index j = gy; Index i = gx; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } } // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // printf("****************************************************************\n"); // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) { // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); Index j = 0; Index i = gx; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) { Index i = gx; Index j = Mesh.getDimensions().y() - 1; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) { Index j = gy; Index i = 0; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) { Index j = gy; Index i = Mesh.getDimensions().x() - 1; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("##################################################################################################\n"); if(gx == Mesh.getDimensions().x() - 1 && gy == Mesh.getDimensions().y() - 1) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } if(gx == Mesh.getDimensions().x() - 1 && gy == 0) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } // printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); if(gx == 0 && gy == Mesh.getDimensions().y() - 1) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } if(gx == 0 && gy == 0) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } __threadfence(); if(flag==1) cudaDofVector[gid] = tmp*3; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) { Real fx = abs(x); Real fy = abs(y); Real tmpMin = Min(fx,fy); if(tmpMin == fx) return x; else return y; } __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i) { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) return; int total = solver->Mesh.getDimensions().x(); //int gid = solver->Mesh.getDimensions().x() * gy + gx; int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); int id1 = gx+gy; int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; /*---------------------------------------------------------------------------------------------------------------------------*/ if(sweep == 1) // for(int i = 0; i < 2*total - 1; i++) { if(id1 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 2) // for(int i = 0; i < 2*total - 1; i++) { if(id2 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 3) // for(int i = 2*total - 2; i > -1; i--) { if(id1 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 4) // for(int i = 2*total - 2; i > -1; i--) { if(id2 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ } __global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver) { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) { solver->initGrid(); } } #endif #endif /* TNLFASTSWEEPING_IMPL_H_ */ examples/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h 0 → 100644 +588 −0 File added.Preview size limit exceeded, changes collapsed. Show changes Loading
examples/fast-sweeping/fastSweepingConfig.h +1 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ class fastSweepingConfig config.addEntry < tnlString > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); config.addRequiredEntry < tnlString > ( "initial-condition", "Initial condition for solver"); config.addEntry < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" ); config.addEntry < tnlString > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); } }; Loading
examples/fast-sweeping/main.h +5 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,10 @@ #include "MainBuildConfig.h" #include "tnlFastSweeping.h" //for HOST versions: //#include "tnlFastSweeping.h" //for DEVICE versions: #include "tnlFastSweeping_CUDA.h" #include "fastSweepingConfig.h" #include <solvers/tnlConfigTags.h> Loading Loading @@ -46,6 +49,7 @@ int main( int argc, char* argv[] ) cerr << "Solver failed to initialize." << endl; return EXIT_FAILURE; } checkCudaDevice; cout << "-------------------------------------------------------------" << endl; cout << "Starting solver..." << endl; solver.run(); Loading
examples/fast-sweeping/tnlFastSweeping.h +19 −3 Original line number Diff line number Diff line Loading @@ -24,6 +24,9 @@ #include <limits.h> #include <core/tnlDevice.h> #include <ctime> #ifdef HAVE_OPENMP #include <omp.h> #endif Loading Loading @@ -60,7 +63,12 @@ public: bool initGrid(); bool run(); void updateValue(const Index i, const Index j); //for single core version use this implementation: //void updateValue(const Index i, const Index j); //for parallel version use this one instead: void updateValue(const Index i, const Index j, DofVectorType* grid); Real fabsMin(const Real x, const Real y); Loading @@ -68,14 +76,22 @@ protected: MeshType Mesh; bool exactInput; DofVectorType dofVector; RealType h; #ifdef HAVE_OPENMP // omp_lock_t* gridLock; #endif }; }; #include "tnlFastSweeping2D_impl.h" //for single core version use this implementation: //#include "tnlFastSweeping2D_impl.h" //for parallel version use this one instead: #include "tnlFastSweeping2D_openMP_impl.h" #endif /* TNLFASTSWEEPING_H_ */
examples/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h 0 → 100644 +522 −0 Original line number Diff line number Diff line /*************************************************************************** tnlFastSweeping_impl.h - description ------------------- begin : Oct 15 , 2015 copyright : (C) 2015 by Tomas Sobotik ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef TNLFASTSWEEPING2D_IMPL_H_ #define TNLFASTSWEEPING2D_IMPL_H_ #include "tnlFastSweeping.h" template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > tnlString tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() { return tnlString( "tnlFastSweeping< " ) + MeshType::getType() + ", " + ::getType< Real >() + ", " + ::getType< Index >() + " >"; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters ) { const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" ); if( ! Mesh.load( meshFile ) ) { cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl; return false; } const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition"); if( ! dofVector.load( initialCondition ) ) { cerr << "I am not able to load the initial condition from the file " << meshFile << "." << endl; return false; } h = Mesh.getHx(); counter = 0; const tnlString& exact_input = parameters.getParameter< tnlString >( "exact-input" ); if(exact_input == "no") exactInput=false; else exactInput=true; #ifdef HAVE_CUDA cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); #endif int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(16, 16); dim3 numBlocks(n/16 + 1 ,n/16 +1); initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); cudaDeviceSynchronize(); checkCudaDevice; return true; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() { // // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = 0; j < Mesh.getDimensions().y(); j++) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // for(Index i = 0; i < Mesh.getDimensions().x(); i++) // { // for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) // { // updateValue(i,j); // } // } // ///*---------------------------------------------------------------------------------------------------------------------------*/ // // // dofVector.save("u-00001.tnl"); int n = Mesh.getDimensions().x(); dim3 threadsPerBlock(32, 32); dim3 numBlocks(n/32 + 1 ,n/32 +1); for(int i = 2*n - 1; i > -1; i--) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 0; i < 2*n ; i++) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 0; i < 2*n ; i++) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; for(int i = 2*n - 1; i > -1; i--) { runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,i); cudaDeviceSynchronize(); } cudaDeviceSynchronize(); checkCudaDevice; cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFree(cudaDofVector); cudaFree(cudaDofVector2); cudaFree(cudaSolver); dofVector.save("u-00001.tnl"); cudaDeviceSynchronize(); return true; } #ifdef HAVE_CUDA template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) { Index index = Mesh.getCellIndex(CoordinatesType(i,j)); Real value = cudaDofVector[index]; Real a,b, tmp; if( i == 0 ) a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; else if( i == Mesh.getDimensions().x() - 1 ) a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; else { a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); } if( j == 0 ) b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; else if( j == Mesh.getDimensions().y() - 1 ) b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; else { b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); } if(abs(a-b) >= h) tmp = fabsMin(a,b) + Sign(value)*h; else tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); cudaDofVector[index] = fabsMin(value, tmp); } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); int total = blockDim.x*gridDim.x; Real tmp = 0.0; int flag = 0; counter = 0; tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(!exactInput) { cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*Sign(cudaDofVector[gid]); } __threadfence(); // printf("-----------------------------------------------------------------------------------\n"); __threadfence(); if(gx > 0 && gx < Mesh.getDimensions().x()-1) { if(gy > 0 && gy < Mesh.getDimensions().y()-1) { Index j = gy; Index i = gx; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } } // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); // printf("****************************************************************\n"); // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) { // printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); Index j = 0; Index i = gx; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) { Index i = gx; Index j = Mesh.getDimensions().y() - 1; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) { Index j = gy; Index i = 0; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) { Index j = gy; Index i = Mesh.getDimensions().x() - 1; // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); if(tmp == 0.0) {} else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) {} else flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; } // printf("##################################################################################################\n"); if(gx == Mesh.getDimensions().x() - 1 && gy == Mesh.getDimensions().y() - 1) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } if(gx == Mesh.getDimensions().x() - 1 && gy == 0) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } // printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); if(gx == 0 && gy == Mesh.getDimensions().y() - 1) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } if(gx == 0 && gy == 0) { // tmp = Sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; } __threadfence(); if(flag==1) cudaDofVector[gid] = tmp*3; } template< typename MeshReal, typename Device, typename MeshIndex, typename Real, typename Index > __device__ Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) { Real fx = abs(x); Real fy = abs(y); Real tmpMin = Min(fx,fy); if(tmpMin == fx) return x; else return y; } __global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver, int sweep, int i) { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) return; int total = solver->Mesh.getDimensions().x(); //int gid = solver->Mesh.getDimensions().x() * gy + gx; int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); int id1 = gx+gy; int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; /*---------------------------------------------------------------------------------------------------------------------------*/ if(sweep == 1) // for(int i = 0; i < 2*total - 1; i++) { if(id1 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 2) // for(int i = 0; i < 2*total - 1; i++) { if(id2 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 3) // for(int i = 2*total - 2; i > -1; i--) { if(id1 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ else if(sweep == 4) // for(int i = 2*total - 2; i > -1; i--) { if(id2 == i) { solver->updateValue(gx,gy); return; } } /*---------------------------------------------------------------------------------------------------------------------------*/ } __global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, tnlHost, int >, double, int >* solver) { int gx = threadIdx.x + blockDim.x*blockIdx.x; int gy = blockDim.y*blockIdx.y + threadIdx.y; if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) { solver->initGrid(); } } #endif #endif /* TNLFASTSWEEPING_IMPL_H_ */
examples/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h 0 → 100644 +588 −0 File added.Preview size limit exceeded, changes collapsed. Show changes