diff --git a/examples/fast-sweeping-map/main.h b/examples/fast-sweeping-map/main.h index 4aee944c0f2403f5f80ab6df01d5a9df250478ca..6d88aa7d340288b7b4052f176a4a4f041ffaf7f5 100644 --- a/examples/fast-sweeping-map/main.h +++ b/examples/fast-sweeping-map/main.h @@ -17,9 +17,9 @@ #include "MainBuildConfig.h" //for HOST versions: -//#include "tnlFastSweepingMap.h" +#include "tnlFastSweepingMap.h" //for DEVICE versions: -#include "tnlFastSweepingMap_CUDA.h" +//#include "tnlFastSweepingMap_CUDA.h" #include "fastSweepingMapConfig.h" #include <solvers/tnlBuildConfigTags.h> diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap.h b/examples/fast-sweeping-map/tnlFastSweepingMap.h index 32d9537a827c9e5dc0c4dfaea748fc057f661b11..2efb869ad894a05d3755490e224ca0188f48559c 100644 --- a/examples/fast-sweeping-map/tnlFastSweepingMap.h +++ b/examples/fast-sweeping-map/tnlFastSweepingMap.h @@ -99,8 +99,10 @@ protected: bool exactInput; + int something_changed; + tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; + DofVectorType data,map; RealType h; diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h index c32f998519073220b072f42e518d0d10a0b9322f..23a1f350bd145e84c5b1e8c377f76b9af7c9f30b 100644 --- a/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h +++ b/examples/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h @@ -247,7 +247,7 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - if(abs(value)-abs(tmp) > 0.1*h) + if(abs(value)-abs(tmp) > 0.0) atomicMax(something_changed,1); } else @@ -280,7 +280,11 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > cudaDofVector2[gid] = INT_MAX*Sign(cudaDofVector[gid]); if(abs(cudaDofVector[gid]) < 1.01*h) + { cudaDofVector2[gid] = cudaDofVector[gid]; + if(map_cuda[gid] != 0.0) + cudaDofVector2[gid] /=map_cuda[gid]; + } diff --git a/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h index edb798b2d606213bb855d30b1e23dce8fd0efdca..d2ccad30439cbbae83105441b174978ac5207fc4 100644 --- a/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h +++ b/examples/fast-sweeping-map/tnlFastSweepingMap2D_impl.h @@ -16,6 +16,10 @@ #ifndef TNLFASTSWEEPING2D_IMPL_H_ #define TNLFASTSWEEPING2D_IMPL_H_ + +#define MAP_SOLVER_MAX_VALUE 3 + + #include "tnlFastSweepingMap.h" template< typename MeshReal, @@ -70,6 +74,10 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > } dofVector2.load(initialCondition); + const tnlString& mapFile = parameters.getParameter <tnlString>("map"); + if(! this->map.load( mapFile )) + cout << "Failed to load map file : " << mapFile << endl; + h = Mesh.template getSpaceStepsProducts< 1, 0 >(); Entity.refresh(); @@ -81,6 +89,8 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > exactInput=true; cout << "a" << endl; + + something_changed = 1; return initGrid(); } @@ -97,226 +107,101 @@ bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) { dofVector2[i]=INT_MAX*Sign(dofVector[i]); - } - for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) - { - for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) - { - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - neighbourEntities.refresh(Mesh,Entity.getIndex()); - - if(dofVector[this->Entity.getIndex()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } + if(abs(dofVector[i]) < 1.01*h) + { + dofVector2[i] = dofVector[i]; + if(map[i] != 0.0) + dofVector2[i] /= map[i]; + } } - cout << "a" << endl; -// Real tmp = 0.0; -// Real ax=0.5/sqrt(2.0); -// -// if(!exactInput) -// { -// for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) -// dofVector[i]=0.5*h*Sign(dofVector[i]); -// } -// -// -// for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// } -// -// -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// Index j = 0; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) +// for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) // { -// Index j = Mesh.getDimensions().y() - 1; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; +// for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) +// { +// this->Entity.setCoordinates(CoordinatesType(i,j)); +// this->Entity.refresh(); +// neighbourEntities.refresh(Mesh,Entity.getIndex()); +// +// if(dofVector[this->Entity.getIndex()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 0 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare1111(i,j); +// else +// setupSquare1110(i,j); +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare1101(i,j); +// else +// setupSquare1100(i,j); +// } +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare1011(i,j); +// else +// setupSquare1010(i,j); +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare1001(i,j); +// else +// setupSquare1000(i,j); +// } +// } +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 0 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare0111(i,j); +// else +// setupSquare0110(i,j); +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare0101(i,j); +// else +// setupSquare0100(i,j); +// } +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 0, 1 >()] > 0) +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare0011(i,j); +// else +// setupSquare0010(i,j); +// } +// else +// { +// if(dofVector[neighbourEntities.template getEntityIndex< 1, 1 >()] > 0) +// setupSquare0001(i,j); +// else +// setupSquare0000(i,j); +// } +// } +// } +// +// } // } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = 0; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = Mesh.getDimensions().x() - 1; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// -// Index i = Mesh.getDimensions().x() - 1; -// Index j = Mesh.getDimensions().y() - 1; -// -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// i = 0; -// j = Mesh.getDimensions().y() -1; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = Sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; + cout << "a" << endl; //data.setLike(dofVector2.getData()); //data=dofVector2.getData(); @@ -336,45 +221,51 @@ template< typename MeshReal, typename Index > bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() { - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) + int cntr = 0; + while(something_changed != 0) { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) + something_changed = 0; + for(Index i = 0; i < Mesh.getDimensions().x(); i++) { - updateValue(i,j); + for(Index j = 0; j < Mesh.getDimensions().y(); j++) + { + updateValue(i,j); + } } - } -/*---------------------------------------------------------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) + for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) { - updateValue(i,j); + for(Index j = 0; j < Mesh.getDimensions().y(); j++) + { + updateValue(i,j); + } } - } -/*---------------------------------------------------------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) + for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) { - updateValue(i,j); + for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) + { + updateValue(i,j); + } } - } -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) + /*---------------------------------------------------------------------------------------------------------------------------*/ + for(Index i = 0; i < Mesh.getDimensions().x(); i++) { - updateValue(i,j); + for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) + { + updateValue(i,j); + } } - } -/*---------------------------------------------------------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------------------------------------------------------*/ + cntr++; + cout << "Finished set of sweeps #" << cntr << " " << something_changed << endl; + } // data.setLike(dofVector2.getData()); @@ -397,42 +288,50 @@ void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > this->Entity.setCoordinates(CoordinatesType(i,j)); this->Entity.refresh(); - tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); + if(map[Entity.getIndex()] != 0.0) + { + tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); - Real value = dofVector2[Entity.getIndex()]; - Real a,b, tmp; + Real value = dofVector2[Entity.getIndex()]; + Real im = abs(1.0/map[Entity.getIndex()]); + Real a,b, tmp; - if( i == 0 ) - a = dofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] ); - } + if( i == 0 ) + a = dofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()]; + else if( i == Mesh.getDimensions().x() - 1 ) + a = dofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()]; + else + { + a = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< -1, 0 >()], + dofVector2[neighbourEntities.template getEntityIndex< 1, 0 >()] ); + } - if( j == 0 ) - b = dofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] ); - } + if( j == 0 ) + b = dofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()]; + else if( j == Mesh.getDimensions().y() - 1 ) + b = dofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()]; + else + { + b = fabsMin( dofVector2[neighbourEntities.template getEntityIndex< 0, -1 >()], + dofVector2[neighbourEntities.template getEntityIndex< 0, 1 >()] ); + } - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + Sign(value)*h; - else - tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); + if(fabs(a-b) >= im*h) + tmp = fabsMin(a,b) + Sign(value)*im*h; + else + tmp = 0.5 * (a + b + Sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) ); + if(abs(value)-abs(tmp) > 0.0) + something_changed = 1; - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); + dofVector2[Entity.getIndex()] = fabsMin(value, tmp); -// if(dofVector2[Entity.getIndex()] > 1.0) -// cout << value << " " << tmp << " " << dofVector2[Entity.getIndex()] << endl; + } + else + { + dofVector2[Entity.getIndex()] = MAP_SOLVER_MAX_VALUE; + } } diff --git a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h index 6c0e48681a1e5aa8072485d8bc1816ceb3386f83..8a5b21d3e701ed7e7db3d2bc0b078b003b6e1141 100644 --- a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h +++ b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h @@ -24,7 +24,7 @@ -#define MAP_SOLVER_MAX_VALUE 150 +#define MAP_SOLVER_MAX_VALUE 3 @@ -32,7 +32,7 @@ template< typename SchemeHost, typename SchemeDevice, typename Device> tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver() { cout << "a" << endl; - this->device = tnlHostDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU + this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU #ifdef HAVE_CUDA if(this->device == tnlCudaDevice) @@ -1023,11 +1023,13 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; /* LOAD MAP */ - int index = (blockIdx.y) * this->n*this->n*this->gridCols - + (blockIdx.x) * this->n - + threadIdx.y * this->n*this->gridCols - + threadIdx.x; - map_local[l]=this->map_stretched_cuda[index]; +// int index = (blockIdx.y) * this->n*this->n*this->gridCols +// + (blockIdx.x) * this->n +// + threadIdx.y * this->n*this->gridCols +// + threadIdx.x; + map_local[l]=this->map_stretched_cuda[gid]; + if(map_local[l] != 0.0) + map_local[l] = 1.0/map_local[l]; /* LOADED */ bool computeFU = !((i == 0 && (boundaryCondition & 4)) or @@ -1074,13 +1076,13 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS if(computeFU) { if(boundaryCondition == 4) - u[l] = u[threadIdx.y * blockDim.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n); + u[l] = u[threadIdx.y * blockDim.x] ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n); else if(boundaryCondition == 2) - u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); + u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); else if(boundaryCondition == 8) - u[l] = u[threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n); + u[l] = u[threadIdx.x] ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n); else if(boundaryCondition == 1) - u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); + u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); } } @@ -1152,8 +1154,9 @@ void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runS if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); __syncthreads(); - if(computeFU) +// if(computeFU) u[l] += currentTau * fu; + // if(abs(u[l]) > MAP_SOLVER_MAX_VALUE) // { // u[l] = /*Sign(u[l])**/MAP_SOLVER_MAX_VALUE; @@ -1309,127 +1312,11 @@ void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/ if(blockIdx.x+blockIdx.y ==0) { - cudaSolver->currentStep = cudaSolver->currentStep + 1; + cudaSolver->currentStep += 1; *(cudaSolver->runcuda) = 0; } -// -// int stepValue = cudaSolver->currentStep + 4; -// if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) -// cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); -// -// atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); - } - - - /* - //printf("I am not an empty kernel!\n"); - //cout << "Synchronizig..." << endl; - int tmp1, tmp2; - int grid1, grid2; - - if(cudaSolver->currentStep & 1) - { - //printf("I am not an empty kernel! 1\n"); - for(int j = 0; j < cudaSolver->gridRows - 1; j++) - { - //printf("I am not an empty kernel! 3\n"); - for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++) - { - tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i; - tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1); - } - } - } - } - else - { - //printf("I am not an empty kernel! 2\n"); - for(int i = 1; i < cudaSolver->gridCols; i++) - { - //printf("I am not an empty kernel! 4\n"); - for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++) - { - - tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1; - tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2); - } - } - } - } - //printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep); - - cudaSolver->currentStep = cudaSolver->currentStep + 1; - int stepValue = cudaSolver->currentStep + 4; - for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++) - { - if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(i, stepValue); - } - - int maxi = 0; - for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++) - { - //printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]); - maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q)); - } - //printf("I am not an empty kernel! %d\n", maxi); - *(cudaSolver->runcuda) = (maxi > 0); - //printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]); - //cout << "Grid synchronized at step " << (this->currentStep - 1 ) << endl; -*/ } @@ -1438,11 +1325,7 @@ template <typename SchemeHost, typename SchemeDevice, typename Device> __global__ void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) { -// if(blockIdx.x+blockIdx.y ==0) -// { -// cudaSolver->currentStep = cudaSolver->currentStep + 1; -// *(cudaSolver->runcuda) = 0; -// } + int stepValue = cudaSolver->currentStep + 4; if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) @@ -1460,54 +1343,18 @@ void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, template< typename SchemeHost, typename SchemeDevice, typename Device> __global__ -void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr) +void initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr) { - //cout << "Initializating solver..." << endl; - //const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh"); - //this->mesh_cuda.load( meshLocation ); - - //this->n_cuda = parameters.getParameter <int>("subgrid-size"); - //cout << "Setting N << this->n_cuda << endl; - - //this->subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda ); - //this->subMesh_cuda.setDomain( tnlStaticVector<2,double>(0.0, 0.0), - //tnlStaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) ); - //this->subMesh_cuda.save("submesh.tnl"); -// const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition"); -// this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) << endl; - - //this->delta_cuda = parameters.getParameter <double>("delta"); - //this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >(); - - //cout << "Setting delta to " << this->delta << endl; - - //this->tau0_cuda = parameters.getParameter <double>("initial-tau"); - //cout << "Setting initial tau to " << this->tau0_cuda << endl; - //this->stopTime_cuda = parameters.getParameter <double>("stop-time"); - - //this->cflCondition_cuda = parameters.getParameter <double>("cfl-condition"); - //this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()); - //cout << "Setting CFL to " << this->cflCondition << endl; -//// -//// - -// this->gridRows_cuda = gridRows; -// this->gridCols_cuda = gridCols; - - cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); + cudaSolver->work_u_cuda = ptr; cudaSolver->map_stretched_cuda = tmp_map_ptr; - cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); + cudaSolver->unusedCell_cuda = ptr3; cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); + cudaSolver->runcuda = ptr2; *(cudaSolver->runcuda) = 1; - cudaSolver->currentStep = 1; - //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); - //ptr = cudaSolver->work_u_cuda; +/* CHANGED !!!!!! from 1 to 0*/ cudaSolver->currentStep = 0; printf("GPU memory allocated.\n"); for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) @@ -1516,51 +1363,18 @@ void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/ cudaSolver->boundaryConditions_cuda[i] = 0; } - /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) - { - printf("%d\n",j); - cudaSolver->unusedCell_cuda[ j] = 1; - }*/ printf("GPU memory initialized.\n"); - - - //cudaSolver->work_u_cuda[50] = 32.153438; -//// -//// - //stretchGrid(); - //this->stopTime_cuda /= (double)(this->gridCols_cuda); - //this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0)); - //cout << "Setting stopping time to " << this->stopTime << endl; - //this->stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter <double>("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >(); - //cout << "Setting stopping time to " << this->stopTime << endl; - - //cout << "Initializating scheme..." << endl; - //if(!this->schemeDevice.init(parameters)) -// { - //cerr << "Scheme failed to initialize." << endl; -// return false; -// } - //cout << "Scheme initialized." << endl; - - //test(); - -// this->currentStep_cuda = 1; - //return true; } -//extern __shared__ double array[]; template< typename SchemeHost, typename SchemeDevice, typename Device > __global__ -void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) +void initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) { - - extern __shared__ double u[]; - //printf("%p\n",caller->work_u_cuda); int i = blockIdx.y * gridDim.x + blockIdx.x; int l = threadIdx.y * blockDim.x + threadIdx.x; @@ -1569,28 +1383,17 @@ void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/ if(l == 0) containsCurve = 0; - //double a; + caller->getSubgridCUDA2D(i,caller, &u[l]); - //printf("%f %f\n",a , u[l]); - //u[l] = a; - //printf("Hi %f \n", u[l]); __syncthreads(); - //printf("hurewrwr %f \n", u[l]); + if(u[0] * u[l] <= 0.0) - { - //printf("contains %d \n",i); atomicMax( &containsCurve, 1); - } __syncthreads(); - //printf("hu"); - //printf("%d : %f\n", l, u[l]); if(containsCurve == 1) { - //printf("have curve \n"); caller->runSubgridCUDA2D(0,u,i); - //printf("%d : %f\n", l, u[l]); - __syncthreads(); caller->insertSubgridCUDA2D(u[l],i); __syncthreads(); if(l == 0) @@ -1606,7 +1409,7 @@ void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/ template< typename SchemeHost, typename SchemeDevice, typename Device > __global__ -void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) +void runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) { extern __shared__ double u[]; int i = blockIdx.y * gridDim.x + blockIdx.x; @@ -1617,227 +1420,88 @@ void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/ { caller->getSubgridCUDA2D(i,caller, &u[l]); - //if(l == 0) - //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i)); + if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) { if(bound & 1) { caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } - if(bound & 2 ) + if(bound & 2) { caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } if(bound & 4) { caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } if(bound & 8) { caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } - - - - - - if( ((bound & 3 ))) - { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 5 ))) - { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 10 ))) - { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 12 )) - { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - } - - else { - - - - - - - - - if( ((bound == 2))) - { - caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 1) )) - { - caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 8) )) - { - caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound == 4)) - { - caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - - - - - - if( ((bound & 3) )) + if(bound == 1) { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->runSubgridCUDA2D(1,u,i); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 5) )) + if(bound == 2) { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->runSubgridCUDA2D(2,u,i); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } - if( ((bound & 10) )) + if(bound == 4) { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->runSubgridCUDA2D(4,u,i); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } - if( (bound & 12) ) + if(bound == 8) { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->runSubgridCUDA2D(8,u,i); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); } + } - - - - - - - - - - - + if(bound & 3) + { + caller->runSubgridCUDA2D(3,u,i); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); } - /*if( bound ) + if(bound & 5) { - caller->runSubgridCUDA2D(15,u,i); + caller->runSubgridCUDA2D(5,u,i); + caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); + } + if(bound & 10) + { + caller->runSubgridCUDA2D(10,u,i); caller->updateSubgridCUDA2D(i,caller, &u[l]); __syncthreads(); - }*/ + } + if(bound & 12) + { + caller->runSubgridCUDA2D(12,u,i); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if(l==0) { diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h index 7a762a09a9ce7c428c2d429f4db10ac7f70d30fb..203159dfa254c6a90199f1775af012abbaf7d1b3 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h @@ -462,12 +462,12 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re } - if(xb - xf > 0.0) + if(xb > xf) a = xb; else a = xf; - if(yb - yf > 0.0) + if(yb > yf) b = yb; else b = yf; diff --git a/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h index fc26c698b6d5c7ff7bc45a9c5a0b0ca4a6cc68fd..ea66c983165bc220c9c05ff056dbdf7ad74e4230 100644 --- a/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h @@ -179,7 +179,7 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, // return 0.0; // } // else - value = 50.0/ map[cellIndex]; + value = 1.0/ map[cellIndex]; RealType xb = u[cellIndex]; @@ -291,13 +291,13 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, { // int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - RealType signui; +// RealType signui; // if(boundaryCondition == 0) - signui = sign(u[cellIndex],/*(boundaryCondition != 0) * */this->epsilon); +// signui = sign(u[cellIndex],/*(boundaryCondition != 0) * */this->epsilon); // else -// signui = Sign(u[cellIndex]); + RealType signui = Sign(u[cellIndex]); - RealType value; +// RealType value; // if(map[cellIndex] == 0.0) // { //// value = INT_MAX; @@ -305,7 +305,7 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, // return 0.0; // } // else - value = 50.0/ map[cellIndex]; + RealType value = /*1.0/*/ map[cellIndex]; RealType xb = u[cellIndex]; @@ -359,33 +359,32 @@ Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, yf = positivePart(yf); } - /*if(boundaryCondition &1) - yb = 0.0; - else - yf = 0.0; - - if(boundaryCondition & 2) - xb = 0.0; - else - xf = 0.0;*/ - - if(xb - xf > 0.0) +// if(boundaryCondition &1) +// yb = 0.0; +// else +// yf = 0.0; +// +// if(boundaryCondition & 2) +// xb = 0.0; +// else +// xf = 0.0; + + if(xb > xf) a = xb; else a = xf; - if(yb - yf > 0.0) + if(yb > yf) b = yb; else b = yf; - c =(value - sqrt(a*a+b*b)*ihx ); + c = (value - sqrt(a*a+b*b)*ihx ); - if(c > 0.0 ) - return Sign(u[cellIndex])*c; - else - - return signui*c; +// if(c > 0.0 ) +// return Sign(u[cellIndex])*c; +// else + return signui*c; }