From e7217db4a436509220b84019d4ee6b7d6c20a650 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Thu, 29 Nov 2018 14:02:03 +0100 Subject: [PATCH] Deleting legacy code. --- .../Legacy/fast-sweeping-map/CMakeLists.txt | 22 - .../fast-sweeping-map/MainBuildConfig.h | 64 - .../fast-sweeping-map/fastSweepingMapConfig.h | 39 - src/TNL/Legacy/fast-sweeping-map/main.cpp | 17 - src/TNL/Legacy/fast-sweeping-map/main.cu | 17 - src/TNL/Legacy/fast-sweeping-map/main.h | 88 - .../fast-sweeping-map/tnlFastSweepingMap.h | 188 -- .../tnlFastSweepingMap2D_CUDA_v4_impl.h | 1051 --------- .../tnlFastSweepingMap2D_impl.h | 823 ------- .../tnlFastSweepingMap_CUDA.h | 196 -- src/TNL/Legacy/fast-sweeping/CMakeLists.txt | 22 - .../Legacy/fast-sweeping/MainBuildConfig.h | 64 - .../Legacy/fast-sweeping/fastSweepingConfig.h | 38 - src/TNL/Legacy/fast-sweeping/main.cpp | 17 - src/TNL/Legacy/fast-sweeping/main.cu | 17 - src/TNL/Legacy/fast-sweeping/main.h | 88 - .../Legacy/fast-sweeping/tnlFastSweeping.h | 186 -- .../tnlFastSweeping2D_CUDA_impl.h | 522 ----- .../tnlFastSweeping2D_CUDA_v2_impl.h | 588 ----- .../tnlFastSweeping2D_CUDA_v3_impl.h | 920 -------- .../tnlFastSweeping2D_CUDA_v4_impl.h | 1003 --------- .../tnlFastSweeping2D_CUDA_v5_impl.h | 697 ------ .../fast-sweeping/tnlFastSweeping2D_impl.h | 927 -------- .../tnlFastSweeping2D_openMP_impl.h | 399 ---- .../tnlFastSweeping3D_CUDA_impl.h | 961 -------- .../fast-sweeping/tnlFastSweeping3D_impl.h | 307 --- .../fast-sweeping/tnlFastSweepingSolver.h | 36 - .../fast-sweeping/tnlFastSweeping_CUDA.h | 194 -- .../CMakeLists.txt | 23 - .../MainBuildConfig.h | 64 - .../hamilton-jacobi-parallel-map/gnuplot.txt | 32 - .../hamilton-jacobi-parallel-map/main.cpp | 17 - .../hamilton-jacobi-parallel-map/main.cu | 17 - .../hamilton-jacobi-parallel-map/main.h | 98 - .../hamilton-jacobi-parallel-map/mapa_png.png | Bin 24841 -> 0 bytes .../hamilton-jacobi-parallel-map/no-Makefile | 41 - .../parallelMapConfig.h | 47 - .../Legacy/hamilton-jacobi-parallel-map/run | 43 - .../tnl-err2eoc-2.py | 141 -- .../tnlParallelMapSolver.h | 217 -- .../tnlParallelMapSolver2D_impl.h | 1315 ----------- .../hamilton-jacobi-parallel/CMakeLists.txt | 23 - .../MainBuildConfig.h | 64 - .../Legacy/hamilton-jacobi-parallel/main.cpp | 17 - .../Legacy/hamilton-jacobi-parallel/main.cu | 17 - .../Legacy/hamilton-jacobi-parallel/main.h | 142 -- .../hamilton-jacobi-parallel/no-Makefile | 41 - .../parallelEikonalConfig.h | 46 - src/TNL/Legacy/hamilton-jacobi-parallel/run | 64 - .../hamilton-jacobi-parallel/tnl-err2eoc-2.py | 141 -- .../tnlParallelEikonalSolver.h | 366 ---- .../tnlParallelEikonalSolver2D_impl.h | 1928 ----------------- .../tnlParallelEikonalSolver3D_impl.h | 1706 --------------- src/TNL/Legacy/narrow-band/CMakeLists.txt | 22 - src/TNL/Legacy/narrow-band/MainBuildConfig.h | 64 - src/TNL/Legacy/narrow-band/main.cpp | 17 - src/TNL/Legacy/narrow-band/main.cu | 17 - src/TNL/Legacy/narrow-band/main.h | 88 - src/TNL/Legacy/narrow-band/narrowBandConfig.h | 40 - src/TNL/Legacy/narrow-band/tnlNarrowBand.h | 186 -- .../tnlNarrowBand2D_CUDA_v4_impl.h | 1317 ----------- .../tnlNarrowBand2D_CUDA_v5_impl.h | 1313 ----------- .../Legacy/narrow-band/tnlNarrowBand2D_impl.h | 927 -------- .../narrow-band/tnlNarrowBand3D_CUDA_impl.h | 961 -------- .../Legacy/narrow-band/tnlNarrowBand3D_impl.h | 307 --- .../Legacy/narrow-band/tnlNarrowBand_CUDA.h | 203 -- 66 files changed, 21563 deletions(-) delete mode 100644 src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt delete mode 100644 src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.cpp delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.cu delete mode 100644 src/TNL/Legacy/fast-sweeping-map/main.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h delete mode 100644 src/TNL/Legacy/fast-sweeping/CMakeLists.txt delete mode 100644 src/TNL/Legacy/fast-sweeping/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h delete mode 100644 src/TNL/Legacy/fast-sweeping/main.cpp delete mode 100644 src/TNL/Legacy/fast-sweeping/main.cu delete mode 100644 src/TNL/Legacy/fast-sweeping/main.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h delete mode 100644 src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel-map/run delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.cu delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/main.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel/run delete mode 100755 src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h delete mode 100644 src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/CMakeLists.txt delete mode 100644 src/TNL/Legacy/narrow-band/MainBuildConfig.h delete mode 100644 src/TNL/Legacy/narrow-band/main.cpp delete mode 100644 src/TNL/Legacy/narrow-band/main.cu delete mode 100644 src/TNL/Legacy/narrow-band/main.h delete mode 100644 src/TNL/Legacy/narrow-band/narrowBandConfig.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h delete mode 100644 src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h diff --git a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt deleted file mode 100644 index 3f9db0da04..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_fast_sweeping_map_SOURCES -# MainBuildConfig.h -# tnlFastSweepingMap2D_impl.h -# tnlFastSweepingMap.h -# fastSweepingMapConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(fast-sweeping-map main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(fast-sweeping-map main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (fast-sweeping-map tnl ) - - -INSTALL( TARGETS fast-sweeping-map - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_fast_sweeping_map_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping-map ) diff --git a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h deleted file mode 100644 index ed3d686eb9..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include <solvers/tnlBuildConfigTags.h> - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" <<std::endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h b/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h deleted file mode 100644 index 9251deca87..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/fastSweepingMapConfig.h +++ /dev/null @@ -1,39 +0,0 @@ -/*************************************************************************** - fastSweepingConfig.h - description - ------------------- - begin : Oct 15, 2015 - copyright : (C) 2015 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef FASTSWEEPINGCONFIG_H_ -#define FASTSWEEPINGCONFIG_H_ - -#include <config/tnlConfigDescription.h> - -template< typename ConfigTag > -class fastSweepingMapConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - config.addRequiredEntry < String > ( "map", "Gradient map for solver"); - } -}; - -#endif /* FASTSWEEPINGCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cpp b/src/TNL/Legacy/fast-sweeping-map/main.cpp deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping-map/main.cu b/src/TNL/Legacy/fast-sweeping-map/main.cu deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping-map/main.h b/src/TNL/Legacy/fast-sweeping-map/main.h deleted file mode 100644 index 6f23851c2e..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -#include "tnlFastSweepingMap.h" - //for DEVICE versions: -//#include "tnlFastSweepingMap_CUDA.h" -#include "fastSweepingMapConfig.h" -#include <solvers/tnlBuildConfigTags.h> - -#include <mesh/tnlGrid.h> -#include <core/tnlDevice.h> -#include <time.h> -#include <ctime> - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - fastSweepingMapConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlFastSweepingMap<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - TNL_CHECK_CUDA_DEVICE; - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver..." <<std::endl; - solver.run(); - } -// else if(dim == 3) -// { -// tnlFastSweepingMap<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver; -// if(!solver.init(parameters)) -// { -// cerr << "Solver failed to initialize." <<std::endl; -// return EXIT_FAILURE; -// } -// TNL_CHECK_CUDA_DEVICE; -// std::cout << "-------------------------------------------------------------" <<std::endl; -// std::cout << "Starting solver..." <<std::endl; -// solver.run(); -// } - else - { - std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl; - return EXIT_FAILURE; - } - - - time(&stop); - std::cout << "Solver stopped..." <<std::endl; - std::cout <<std::endl; - std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl; - return EXIT_SUCCESS; -} - - diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h deleted file mode 100644 index c568329ba2..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap.h +++ /dev/null @@ -1,188 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <functions/tnlMeshFunction.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> -#ifdef HAVE_OPENMP -#include <omp.h> -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweepingMap -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlFastSweepingMap(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - int something_changed; - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data,map; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweepingMap(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlFastSweepingMap2D_impl.h" - //for parallel version use this one instead: -// #include "tnlFastSweepingMap2D_openMP_impl.h" - -// #include "tnlFastSweepingMap3D_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h deleted file mode 100644 index d02b8d6c5d..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1051 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweepingMap.h" - -#define MAP_SOLVER_MAX_VALUE 3 - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweepingMap< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - const String& mapFile = parameters.getParameter <String>("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <<std::endl; - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(map_cuda), this->map.getSize()*sizeof(double)); - cudaMemcpy(map_cuda, this->map.getData(), this->map.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(changed), sizeof(int)); - //counter == 0 --> setting changed to 0 - cudaMemcpy(changed, &counter, sizeof(int), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(4,1); - - int run = 1; - int zero = 0; - int cntr = 0; - - while(run != 0) - { - cudaMemcpy(this->changed, &zero, sizeof(int), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0, this->changed); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(&run, this->changed,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cntr++; - cout << "Finished set of sweeps #" << cntr << " " << run <<std::endl; - } - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index* something_changed) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - - if(map_cuda[Entity.getIndex()] != 0.0) - { - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real im = abs(1.0/map_cuda[Entity.getIndex()]); - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= im*h) - tmp = fabsMin(a,b) + sign(value)*im*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - - if(abs(value)-abs(tmp) > 0.0) - atomicMax(something_changed,1); - } - else - { - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), MAP_SOLVER_MAX_VALUE); - } - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - if(abs(cudaDofVector[gid]) < 1.01*h) - { - cudaDofVector2[gid] = cudaDofVector[gid]; - if(map_cuda[gid] != 0.0) - cudaDofVector2[gid] /=map_cuda[gid]; - } - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed) -{ - - __shared__ int something_changed; - if(threadIdx.x+threadIdx.y == 0) - something_changed = 0; - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - __syncthreads(); - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,&something_changed); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,&something_changed); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,&something_changed); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,&something_changed); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - - - if(threadIdx.x+threadIdx.y == 0) - atomicMax(changed, something_changed); - - - - -} - - -__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h deleted file mode 100644 index 4bd9e17c56..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap2D_impl.h +++ /dev/null @@ -1,823 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - - -#define MAP_SOLVER_MAX_VALUE 3 - - -#include "tnlFastSweepingMap.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweepingMap< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweepingMap() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - dofVector2.load(initialCondition); - - const String& mapFile = parameters.getParameter <String>("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <<std::endl; - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" <<std::endl; - - something_changed = 1; - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - - if(abs(dofVector[i]) < 1.01*h) - { - dofVector2[i] = dofVector[i]; - if(map[i] != 0.0) - dofVector2[i] /= map[i]; - } - } - -// for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) -// { -// for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) -// { -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// neighborEntities.refresh(Mesh,Entity.getIndex()); -// -// if(dofVector[this->Entity.getIndex()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } -// } - cout << "a" <<std::endl; - - //data.setLike(dofVector2.getData()); - //data=dofVector2.getData(); - //cout << data.getType() <<std::endl; - dofVector2.save("u-00000.tnl"); - //dofVector2.getData().save("u-00000.tnl"); - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - int cntr = 0; - while(something_changed != 0) - { - something_changed = 0; - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - - /*---------------------------------------------------------------------------------------------------------------------------*/ - cntr++; - cout << "Finished set of sweeps #" << cntr << " " << something_changed <<std::endl; - } - - - - dofVector2.save("u-00001.tnl"); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - if(map[Entity.getIndex()] != 0.0) - { - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real im = abs(1.0/map[Entity.getIndex()]); - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= im*h) - tmp = fabsMin(a,b) + sign(value)*im*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * im * h * im * h - (a - b) * (a - b) ) ); - - if(abs(value)-abs(tmp) > 0.0) - something_changed = 1; - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - - } - else - { - dofVector2[Entity.getIndex()] = MAP_SOLVER_MAX_VALUE; - } -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h b/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h deleted file mode 100644 index a23057e78c..0000000000 --- a/src/TNL/Legacy/fast-sweeping-map/tnlFastSweepingMap_CUDA.h +++ /dev/null @@ -1,196 +0,0 @@ -/*************************************************************************** - tnlFastSweepingMap_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> - -#include <functions/tnlMeshFunction.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweepingMap -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweepingMap(); - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j, Index* something_changed); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweepingMap< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - double* map_cuda; - int counter; - int* changed; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data, map; - - - RealType h; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweepingMap< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template<int sweep_t> -__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i, int* changed); -//__global__ void runCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -//__global__ void initCUDA(tnlFastSweepingMap< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - -/*various implementtions.... choose one*/ -//#include "tnlFastSweepingMap2D_CUDA_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v2_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v3_impl.h" -#include "tnlFastSweepingMap2D_CUDA_v4_impl.h" -//#include "tnlFastSweepingMap2D_CUDA_v5_impl.h" - - -// #include "tnlFastSweepingMap3D_CUDA_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt b/src/TNL/Legacy/fast-sweeping/CMakeLists.txt deleted file mode 100644 index 1a23d646a4..0000000000 --- a/src/TNL/Legacy/fast-sweeping/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_fast_sweeping_SOURCES -# MainBuildConfig.h -# tnlFastSweeping2D_impl.h -# tnlFastSweeping.h -# fastSweepingConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(fast-sweeping main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(fast-sweeping main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (fast-sweeping tnl ) - - -INSTALL( TARGETS fast-sweeping - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_fast_sweeping_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/fast-sweeping ) diff --git a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h b/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h deleted file mode 100644 index ed3d686eb9..0000000000 --- a/src/TNL/Legacy/fast-sweeping/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include <solvers/tnlBuildConfigTags.h> - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" <<std::endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h b/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h deleted file mode 100644 index 3df2c1e889..0000000000 --- a/src/TNL/Legacy/fast-sweeping/fastSweepingConfig.h +++ /dev/null @@ -1,38 +0,0 @@ -/*************************************************************************** - fastSweepingConfig.h - description - ------------------- - begin : Oct 15, 2015 - copyright : (C) 2015 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef FASTSWEEPINGCONFIG_H_ -#define FASTSWEEPINGCONFIG_H_ - -#include <config/tnlConfigDescription.h> - -template< typename ConfigTag > -class fastSweepingConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - } -}; - -#endif /* FASTSWEEPINGCONFIG_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/main.cpp b/src/TNL/Legacy/fast-sweeping/main.cpp deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/fast-sweeping/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping/main.cu b/src/TNL/Legacy/fast-sweeping/main.cu deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/fast-sweeping/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/fast-sweeping/main.h b/src/TNL/Legacy/fast-sweeping/main.h deleted file mode 100644 index e5ac15fede..0000000000 --- a/src/TNL/Legacy/fast-sweeping/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -#include "tnlFastSweeping.h" - //for DEVICE versions: -//#include "tnlFastSweeping_CUDA.h" -#include "fastSweepingConfig.h" -#include <solvers/tnlBuildConfigTags.h> - -#include <mesh/tnlGrid.h> -#include <core/tnlDevice.h> -#include <time.h> -#include <ctime> - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - fastSweepingConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlFastSweeping<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - TNL_CHECK_CUDA_DEVICE; - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver..." <<std::endl; - solver.run(); - } - else if(dim == 3) - { - tnlFastSweeping<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - TNL_CHECK_CUDA_DEVICE; - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver..." <<std::endl; - solver.run(); - } - else - { - std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl; - return EXIT_FAILURE; - } - - - time(&stop); - std::cout << "Solver stopped..." <<std::endl; - std::cout <<std::endl; - std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl; - return EXIT_SUCCESS; -} - - diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h deleted file mode 100644 index 96d26db7b5..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping.h +++ /dev/null @@ -1,186 +0,0 @@ -/*************************************************************************** - tnlFastSweeping.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <functions/tnlMeshFunction.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> -#ifdef HAVE_OPENMP -#include <omp.h> -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweeping -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlFastSweeping(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweeping(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlFastSweeping2D_impl.h" - //for parallel version use this one instead: -// #include "tnlFastSweeping2D_openMP_impl.h" - -#include "tnlFastSweeping3D_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h deleted file mode 100644 index bc1da169c0..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_impl.h +++ /dev/null @@ -1,522 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.getSpaceSteps().x(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(32, 32); - dim3 numBlocks(n/32 + 1 ,n/32 +1); - - for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 0; i < 2*n ; i++) - { - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 0; i < 2*n ; i++) - { - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,i); - cudaDeviceSynchronize(); - } - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - Real fy = abs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - return; - int total = solver->Mesh.getDimensions().x(); - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - int id1 = gx+gy; - int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - /*---------------------------------------------------------------------------------------------------------------------------*/ - if(sweep == 1) -// for(int i = 0; i < 2*total - 1; i++) - { - if(id1 == i) - { - solver->updateValue(gx,gy); - return; - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 2) -// for(int i = 0; i < 2*total - 1; i++) - { - if(id2 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 3) -// for(int i = 2*total - 2; i > -1; i--) - { - if(id1 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - else if(sweep == 4) -// for(int i = 2*total - 2; i > -1; i--) - { - if(id2 == i) - { - solver->updateValue(gx,gy); - return; - } - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h deleted file mode 100644 index 3ad5b7944f..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v2_impl.h +++ /dev/null @@ -1,588 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v2_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.getSpaceSteps().x(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(27, 27); - dim3 numBlocks(1 ,1); - -// for(int i = 2*n - 1; i > -1; i--) - { - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,0); - cudaDeviceSynchronize(); - } - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -//// for(int i = 0; i < 2*n ; i++) -// { -// runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,0); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -//// for(int i = 0; i < 2*n ; i++) -// { -// runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,0); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -//// for(int i = 2*n - 1; i > -1; i--) -// { -// runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,3,0); -// cudaDeviceSynchronize(); -// } -// -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - - Real tmpMin = Min(fx,abs(y)); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -{ - - //int gx = threadIdx.x; - //int gy = threadIdx.y; - int id1,id2; - int nx = solver->Mesh.getDimensions().x()+ threadIdx.x; - int ny = solver->Mesh.getDimensions().y()+ threadIdx.y; - - int blockCount = solver->Mesh.getDimensions().x()/blockDim.x + 1; - - for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y) - { - for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - //gx+=blockDim.x; - //__syncthreads(); - } - //gx = threadIdx.x; - //gy+=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -// gx = blockDim.x*(blockCount-1) + threadIdx.x; -// gy = threadIdx.y; - for(int gy = threadIdx.y; gy < ny;gy+=blockDim.y) - { - for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx-=blockDim.x; - //__syncthreads(); - } - //gx = blockDim.x*(blockCount-1) + threadIdx.x; - //gy+=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -// gx = blockDim.x*(blockCount-1) + threadIdx.x; -// gy = blockDim.x*(blockCount-1) + threadIdx.y; - for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y) - { - for(int gx = blockDim.x*(blockCount-1) + threadIdx.x; gx >- 1;gx-=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx-=blockDim.x; - //__syncthreads(); - } - //gx = blockDim.x*(blockCount-1) + threadIdx.x; - //gy-=blockDim.y; - //__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - //gx = threadIdx.x; - //gy = blockDim.x*(blockCount-1) +threadIdx.y; - for(int gy = blockDim.x*(blockCount-1) +threadIdx.y; gy >- 1;gy-=blockDim.y) - { - for(int gx = threadIdx.x; gx < nx;gx+=blockDim.x) - { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && gy > -1&& gx > -1) - { - id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - } - //gx+=blockDim.x; - //__syncthreads(); - } - //gx = threadIdx.x; - //gy-=blockDim.y; - ///__syncthreads(); - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h deleted file mode 100644 index ff36d3f8e0..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v3_impl.h +++ /dev/null @@ -1,920 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v3_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - - - - -__device__ double atomicSet(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong(val )); - } while (assumed != old); - return __longlong_as_double(old); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.getSpaceSteps().x(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 +1 ,n/16 +1); - int m =n/16 +1; - - for(int i = 0; i < 2*m -1; i++) - { - runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); - //cudaDeviceSynchronize(); - } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<2><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<4><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); -// cudaDeviceSynchronize(); -// } -// cudaDeviceSynchronize(); -// TNL_CHECK_CUDA_DEVICE; -// for(int i = 0; i < 2*m -1; i++) -// { -// runCUDA<8><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,8,i); -// cudaDeviceSynchronize(); -// } - - - - -// for(int i = 0; i < (2*m -1)/4 -1; i++) -// { -// runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,15,i);//all -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/4 -1; i < (2*m -1)/2 -1; i++) -// { -// runCUDA<5><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,5,i); //two -// cudaDeviceSynchronize(); -// runCUDA<10><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,10,i); //two -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/2 -1; i < (2*m -1)/2 +1; i++) -// { -// runCUDA<1><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,1,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<2><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,2,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<4><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,4,i); //separate -// cudaDeviceSynchronize(); -// runCUDA<8><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,8,i); //separate -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1)/2 +1; i < (2*m -1/4)*3 +1; i++) -// { -// runCUDA<5><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,5,i); //two -// cudaDeviceSynchronize(); -// runCUDA<10><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,10,i); //two -// cudaDeviceSynchronize(); -// } -// for(int i = (2*m -1/4)*3 +1; i < 2*m -1; i++) -// { -// runCUDA<15><<<numBlocks,threadsPerBlock>>>(this->cudaSolver,15,i);//all -// cudaDeviceSynchronize(); -// } -cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - atomicSet(&cudaDofVector[index],fabsMin(value, tmp)); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ -// Real fx = abs(x); -// -// Real tmpMin = Min(fx,abs(y)); - - if(abs(y) > abs(x)) - return x; - else - return y; - - -} - - -template<> -__global__ void runCUDA<1>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -{ - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ -} - template<> - __global__ void runCUDA<2>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - } - __syncthreads(); - } - - } - } /*---------------------------------------------------------------------------------------------------------------------------*/ - template<> - __global__ void runCUDA<4>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - } - - template<> - __global__ void runCUDA<8>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - -} - - - template<> - __global__ void runCUDA<5>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - else if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - } - - - template<> - __global__ void runCUDA<10>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - - else if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - - } - - - - template<> - __global__ void runCUDA<15>(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) - { - - if(blockIdx.x+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == k) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 0; l < 2*blockDim.x - 1; l++) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if(blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id1 = threadIdx.x+threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id1 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - if((gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) - { - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = threadIdx.y + blockDim.y*blockIdx.y; - - int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; - - for(int l = 2*blockDim.x - 2; l > -1; l--) - { - if(id2 == l) - { - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) - solver->updateValue(gx,gy); - return; - } - __syncthreads(); - } - - } - /*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - } - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - - - -//__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int k) -//{ -// -// if(sweep==1 && blockIdx.x+blockIdx.y == k) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id1 = threadIdx.x+threadIdx.y; -// -// for(int l = 0; l < 2*blockDim.x - 1; l++) -// { -// if(id1 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==2 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == k) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; -// -// for(int l = 0; l < 2*blockDim.x - 1; l++) -// { -// if(id2 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==4 && blockIdx.x+blockIdx.y == gridDim.x+gridDim.y-k-2) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id1 = threadIdx.x+threadIdx.y; -// -// for(int l = 2*blockDim.x - 2; l > -1; l--) -// { -// if(id1 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// return; -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// else if(sweep==8 && (gridDim.x - blockIdx.x - 1)+blockIdx.y == gridDim.x+gridDim.y-k-2) -// { -// int gx = threadIdx.x + blockDim.x*blockIdx.x; -// int gy = threadIdx.y + blockDim.y*blockIdx.y; -// -// int id2 = (blockDim.x - threadIdx.x - 1) + threadIdx.y; -// -// for(int l = 2*blockDim.x - 2; l > -1; l--) -// { -// if(id2 == l) -// { -// if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy /*&& gy > -1&& gx > -1*/) -// solver->updateValue(gx,gy); -// return; -// } -// __syncthreads(); -// } -// -// } -// /*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// -// -// -//} - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h deleted file mode 100644 index e0a9697c2e..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1003 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(4,1); - - - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -// cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); -// -// if(abs(cudaDofVector[gid]) < 1.01*h) -// cudaDofVector2[gid] = cudaDofVector[gid]; - - - - - - if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() && !exactInput) - { - if(cudaDofVector[Entity.getIndex()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - if(exactInput) - { - if(abs(cudaDofVector[gid]) < 1.5*h) - cudaDofVector2[gid] = cudaDofVector[gid]; - } - - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); -// Entity.setCoordinates(CoordinatesType(i,j)); -// Entity.refresh(); -// tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); -// cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); -// Entity.setCoordinates(CoordinatesType(i,j)); -// Entity.refresh(); -// tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); -// cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=INT_MAX; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=INT_MAX; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-INT_MAX; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=0.5*h; //fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b = 1.0; - c = -be; - s = h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=-0.5*h; //fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=0.5*h; //fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=-0.5*h; //fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=-0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=0.5*h; //fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h deleted file mode 100644 index 1591bb6137..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_CUDA_v5_impl.h +++ /dev/null @@ -1,697 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v5_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.getSpaceSteps().x(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData(), this->dofVector.getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1); - - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ -// -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = 0; j < Mesh.getDimensions().y(); j++) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// for(Index i = 0; i < Mesh.getDimensions().x(); i++) -// { -// for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) -// { -// updateValue(i,j); -// } -// } -// -///*---------------------------------------------------------------------------------------------------------------------------*/ -// -// -// dofVector.save("u-00001.tnl"); - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 512); - dim3 numBlocks(4,1); - - - runCUDA<<<numBlocks,threadsPerBlock,3*(512+1)*sizeof(double)>>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData(), cudaDofVector, this->dofVector.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = cudaDofVector[index]; - Real a,b, tmp; - - if( i == 0 ) - a = cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<-1,0>(index)], - cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( cudaDofVector[Mesh.template getCellNextToCell<0,-1>(index)], - cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - cudaDofVector[index] = fabsMin(value, tmp); - -} - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, double** sharedMem, int k3) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = sharedMem[k3+1][threadIdx.y]; - Real a,b, tmp; - - if( i == 0 ) - a = sharedMem[k3][threadIdx.y]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = sharedMem[k3+2][threadIdx.y]; - else - { - a = fabsMin( sharedMem[k3][threadIdx.y], - sharedMem[k3+2][threadIdx.y] ); - } - - if( j == 0 ) - b = sharedMem[k3][threadIdx.y+1]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = sharedMem[k3+2][threadIdx.y-1]; - else - { - b = fabsMin( sharedMem[k3][threadIdx.y+1], - sharedMem[k3+2][threadIdx.y-1] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -// sharedMem[k3+1][threadIdx.y] = this->fabsMin(value, tmp); -// atomicFabsMin(&(cudaDofVector[index]), tmp); - cudaDofVector[index] = tmp; - this->fabsMin(value, tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gid = Mesh.getCellIndex(CoordinatesType(gx,gy)); - - int total = blockDim.x*gridDim.x; - - - - Real tmp = 0.0; - int flag = 0; - counter = 0; - tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - - - if(!exactInput) - { - cudaDofVector[gid]=cudaDofVector[gid]=0.5*h*sign(cudaDofVector[gid]); - } - __threadfence(); -// printf("-----------------------------------------------------------------------------------\n"); - - __threadfence(); - - if(gx > 0 && gx < Mesh.getDimensions().x()-1) - { - if(gy > 0 && gy < Mesh.getDimensions().y()-1) - { - - Index j = gy; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag=1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); -// printf("****************************************************************\n"); -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == 0) - { -// printf("gx: %d, gy: %d, gid: %d \n", gx, gy,gid); - Index j = 0; - Index i = gx; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - if(gx > 0 && gx < Mesh.getDimensions().x()-1 && gy == Mesh.getDimensions().y() - 1) - { - Index i = gx; - Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == 0) - { - Index j = gy; - Index i = 0; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } -// printf("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); - if(gy > 0 && gy < Mesh.getDimensions().y()-1 && gx == Mesh.getDimensions().x() - 1) - { - Index j = gy; - Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - -// printf("##################################################################################################\n"); - if(gx == Mesh.getDimensions().x() - 1 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == Mesh.getDimensions().x() - 1 && - gy == 0) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx-1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } -// printf("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); - if(gx == 0 && - gy == Mesh.getDimensions().y() - 1) - { - -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy-1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - if(gx == 0 && - gy == 0) - { -// tmp = sign(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))]); - if(cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx+1,gy))]*tmp > 0.0 && - cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy+1))]*tmp > 0.0) - - flag = 1;//cudaDofVector[Mesh.getCellIndex(CoordinatesType(gx,gy))] = tmp*INT_MAX; - } - - __threadfence(); - - if(flag==1) - cudaDofVector[gid] = tmp*3; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - extern __shared__ double u[]; - double* sharedMem[5]; - sharedMem[0] = u; - sharedMem[1] = &(u[blockDim.y+1]); - sharedMem[2] = &(sharedMem[1][blockDim.y+1]); - sharedMem[3] = sharedMem[1]; - sharedMem[4] = sharedMem[2]; - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - - if(blockIdx.x==0) - { - if(threadIdx.y==0) - sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,0))]; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - int k3=k%3; - - if(threadIdx.y==0) - { - if(gx==n-1) - sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy+blockDim.y))]; - else - sharedMem[k3][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; - } -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1]; - - if(gy<n-1) - sharedMem[k3][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; - - solver->updateValue(gx,gy,sharedMem,k3); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } -// else if(blockIdx.x==1) -// { -// gx=n-1; -// gy=threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,0))]; -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(threadIdx.y < k+1 && gy < n) -// { -// int k3=k%3; -// -// if(threadIdx.y==0) -// if(gx==0) -// sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy+blockDim.y))]; -// else -// sharedMem[k3+2][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx-1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1]; -// -// if(gy<n-1) -// sharedMem[k3+2][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx--; -// if(gx==-1) -// { -// gx=n-1; -// gy+=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } -// else if(blockIdx.x==2) -// { -// gx=0; -// gy=n-blockDim.y-1+threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,n-1))]; -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(blockDim.y-threadIdx.y < k+1 && gy > -1) -// { -// int k3=k%3; -// -// if(threadIdx.y==blockDim.y-1) -// if(gx==n-1) -// sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(0,gy-blockDim.y))]; -// else -// sharedMem[k3][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3+2][threadIdx.y-1]; -// -// if(gy<n-1) -// sharedMem[k3][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx++; -// if(gx==n) -// { -// gx=0; -// gy-=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } -// else if(blockIdx.x==3) -// { -// gx=n-1; -// gy=n-blockDim.y-1+threadIdx.y; -// -// if(threadIdx.y==0) -// sharedMem[1][0]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,n-1))]; -// -// -// for(int k = 0; k < n*blockCount + blockDim.y; k++) -// { -// if(blockDim.y-threadIdx.y < k+1 && gy > -1) -// { -// int k3=k%3; -// -// if(threadIdx.y==blockDim.y-1) -// if(gx==n-1) -// sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(n-1,gy-blockDim.y))]; -// else -// sharedMem[k3+2][n-1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx+1,gy))]; -// else -// solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy-1))]=sharedMem[k3][threadIdx.y-1]; -// -// if(gy<n-1) -// sharedMem[k3+2][threadIdx.y+1]=solver->cudaDofVector[solver->Mesh.getCellIndex(Containers::StaticVector<2,int>(gx,gy+1))]; -// -// -// solver->updateValue(gx,gy,sharedMem,k3); -// gx--; -// if(gx==-1) -// { -// gx=n-1; -// gy-=blockDim.y; -// } -// } -// -// -// __syncthreads(); -// } -// } - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h deleted file mode 100644 index c4ce8fe6b2..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_impl.h +++ /dev/null @@ -1,927 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - dofVector2.load(initialCondition); - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" <<std::endl; - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) - { - for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) - { - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - neighborEntities.refresh(Mesh,Entity.getIndex()); - - if(dofVector[this->Entity.getIndex()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - } - cout << "a" <<std::endl; - -// Real tmp = 0.0; -// Real ax=0.5/sqrt(2.0); -// -// if(!exactInput) -// { -// for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) -// dofVector[i]=0.5*h*sign(dofVector[i]); -// } -// -// -// for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// } -// -// -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// Index j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// -// Index i = Mesh.getDimensions().x() - 1; -// Index j = Mesh.getDimensions().y() - 1; -// -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// i = 0; -// j = Mesh.getDimensions().y() -1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - //data.setLike(dofVector2.getData()); - //data=dofVector2.getData(); - //cout << data.getType() <<std::endl; - dofVector2.save("u-00000.tnl"); - //dofVector2.getData().save("u-00000.tnl"); - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - -// data.setLike(dofVector2.getData()); -// data = dofVector2.getData(); -// cout << data.getType() <<std::endl; - dofVector2.save("u-00001.tnl"); - //dofVector2.getData().save("u-00001.tnl"); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - -// if(dofVector2[Entity.getIndex()] > 1.0) -// cout << value << " " << tmp << " " << dofVector2[Entity.getIndex()] <<std::endl; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h deleted file mode 100644 index 54bbe931e0..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping2D_openMP_impl.h +++ /dev/null @@ -1,399 +0,0 @@ -/*************************************************************************** - tnlFastSweeping_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING2D_IMPL_H_ -#define TNLFASTSWEEPING2D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.getSpaceSteps().x(); - - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - -#ifdef HAVE_OPENMP -// gridLock = (omp_lock_t*) malloc(sizeof(omp_lock_t)*Mesh.getDimensions().x()*Mesh.getDimensions().y()); -// -// for(int i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) -// omp_init_lock(&gridLock[i]); -#endif - - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - Real tmp = 0.0; - - if(!exactInput) - { - for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) - dofVector[i]=0.5*h*sign(dofVector[i]); - } - - - for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) - { - for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) - { - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - } - - - - for(int i = 1; i < Mesh.getDimensions().x()-1; i++) - { - Index j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int i = 1; i < Mesh.getDimensions().x()-1; i++) - { - Index j = Mesh.getDimensions().y() - 1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int j = 1; j < Mesh.getDimensions().y()-1; j++) - { - Index i = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - for(int j = 1; j < Mesh.getDimensions().y()-1; j++) - { - Index i = Mesh.getDimensions().x() - 1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - - - if(tmp == 0.0) - {} - else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) - {} - else - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - } - - - Index i = Mesh.getDimensions().x() - 1; - Index j = Mesh.getDimensions().y() - 1; - - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - i = 0; - j = Mesh.getDimensions().y() -1; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - - j = 0; - tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); - if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && - dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) - - dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - - dofVector.save("u-00000.tnl"); - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - DofVectorType d2,d3,d4; - d2.setLike(dofVector); - d2=dofVector; - d3.setLike(dofVector); - d3=dofVector; - d4.setLike(dofVector); - d4=dofVector; - - -#ifdef HAVE_OPENMP -#pragma omp parallel sections num_threads(4) - { - { -#endif - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,&dofVector); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,&d2); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j, &d3); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } -#pragma omp section - { -#endif - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j, &d4); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ -#ifdef HAVE_OPENMP - } - } -#endif - - -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - int index = Mesh.getCellIndex(CoordinatesType(i,j)); - dofVector[index] = fabsMin(dofVector[index], d2[index]); - dofVector[index] = fabsMin(dofVector[index], d3[index]); - dofVector[index] = fabsMin(dofVector[index], d4[index]); - } - } - - dofVector.save("u-00001.tnl"); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, DofVectorType* grid) -{ - Index index = Mesh.getCellIndex(CoordinatesType(i,j)); - Real value = (*grid)[index]; - Real a,b, tmp; - - if( i == 0 ) - a = (*grid)[Mesh.template getCellNextToCell<1,0>(index)]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = (*grid)[Mesh.template getCellNextToCell<-1,0>(index)]; - else - { - a = fabsMin( (*grid)[Mesh.template getCellNextToCell<-1,0>(index)], - (*grid)[Mesh.template getCellNextToCell<1,0>(index)] ); - } - - if( j == 0 ) - b = (*grid)[Mesh.template getCellNextToCell<0,1>(index)]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = (*grid)[Mesh.template getCellNextToCell<0,-1>(index)]; - else - { - b = fabsMin( (*grid)[Mesh.template getCellNextToCell<0,-1>(index)], - (*grid)[Mesh.template getCellNextToCell<0,1>(index)] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - -#ifdef HAVE_OPENMP -// omp_set_lock(&gridLock[index]); -#endif - (*grid)[index] = fabsMin(value, tmp); -#ifdef HAVE_OPENMP -// omp_unset_lock(&gridLock[index]); -#endif -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - - -} - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h deleted file mode 100644 index 6a5195cfe4..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_CUDA_impl.h +++ /dev/null @@ -1,961 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING3D_IMPL_H_ -#define TNLFASTSWEEPING3D_IMPL_H_ - -#include "tnlFastSweeping.h" - -//__device__ -//double fabsMin( double x, double y) -//{ -// double fx = abs(x); -// -// if(Min(fx,abs(y)) == fx) -// return x; -// else -// return y; -//} -// -//__device__ -//double atomicFabsMin(double* address, double val) -//{ -// unsigned long long int* address_as_ull = -// (unsigned long long int*)address; -// unsigned long long int old = *address_as_ull, assumed; -// do { -// assumed = old; -// old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); -// } while (assumed != old); -// return __longlong_as_double(old); -//} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - this->h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(8, 8,8); - dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 1024); - dim3 numBlocks(8,1); - - - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()] ); - } - - if( k == 0 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()]; - else - { - c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - int gid = Entity.getIndex(); - - if(abs(cudaDofVector[gid]) < 1.0*h) - cudaDofVector2[gid] = 0.5*h;//cudaDofVector[gid]; - else - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - - if(blockIdx.x==0) - { - for(int gz = 0; gz < n;gz++) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==2) - { - - for(int gz = 0; gz < n;gz++) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==3) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - - else if(blockIdx.x==4) - { - for(int gz = n-1; gz > -1;gz--) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==5) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==6) - { - - for(int gz = n-1; gz > -1;gz--) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==7) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - -} - - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gz = blockDim.z*blockIdx.z + threadIdx.z; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) - { - solver->initGrid(gx,gy,gz); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -//} -#endif - - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h deleted file mode 100644 index e22de0ab85..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping3D_impl.h +++ /dev/null @@ -1,307 +0,0 @@ -/*************************************************************************** - tnlFastSweeping2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING3D_IMPL_H_ -#define TNLFASTSWEEPING3D_IMPL_H_ - -#include "tnlFastSweeping.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlFastSweeping< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlFastSweeping() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - dofVector2.load(initialCondition); - - h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; -// cout << "bla "<<endl; - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) - { - - if (abs(dofVector[i]) < 1.8*h) - dofVector2[i]=dofVector[i]; - else - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - - - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - dofVector2.save("u-00001.tnl"); - - cout << "bla 3"<<endl; - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - this->Entity.setCoordinates(CoordinatesType(i,j,k)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - Real value = dofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()] ); - } - - if( k == 0 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()]; - else - { - c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -#endif /* TNLFASTSWEEPING_IMPL_H_ */ diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h deleted file mode 100644 index fc9eb54598..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweepingSolver.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * File: tnlFastSweepingSolver.h - * Author: oberhuber - * - * Created on July 12, 2016, 6:04 PM - */ - -#pragma once - -#include <functions/tnlConstantFunction.h> -#include <problems/tnlPDEProblem.h> - -template< typename Mesh, - typename Communicator, - typename Anisotropy = tnlConstanstFunction< Mesh > > -class tnlFastSweepingSolver : public tnlPDEProblem< Mesh, - Communicator, - typename Mesh::RealType, - typename Mesh::DeviceType, - typename Mesh::IndexType > -{ - public: - - typedef typename DifferentialOperator::RealType RealType; - typedef typename Mesh::DeviceType DeviceType; - typedef typename DifferentialOperator::IndexType IndexType; - - typedef tnlMeshFunction< Mesh > MeshFunctionType; - typedef tnlPDEProblem< Mesh, TimeDependentProblem, RealType, DeviceType, IndexType > BaseType; - - using typename BaseType::MeshType; - using typename BaseType::DofVectorType; - using typename BaseType::MeshDependentDataType; -}; - - diff --git a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h b/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h deleted file mode 100644 index f531da431b..0000000000 --- a/src/TNL/Legacy/fast-sweeping/tnlFastSweeping_CUDA.h +++ /dev/null @@ -1,194 +0,0 @@ -/*************************************************************************** - tnlFastSweeping_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLFASTSWEEPING_H_ -#define TNLFASTSWEEPING_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> - -#include <functions/tnlMeshFunction.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlFastSweeping -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlFastSweeping(); - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweeping< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data; - - - RealType h; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - __host__ static String getType(); - __host__ bool init( const Config::ParameterContainer& parameters ); - __host__ bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlFastSweeping< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template<int sweep_t> -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); -__global__ void runCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initCUDA(tnlFastSweeping< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - -/*various implementtions.... choose one*/ -//#include "tnlFastSweeping2D_CUDA_impl.h" -//#include "tnlFastSweeping2D_CUDA_v2_impl.h" -//#include "tnlFastSweeping2D_CUDA_v3_impl.h" -#include "tnlFastSweeping2D_CUDA_v4_impl.h" -//#include "tnlFastSweeping2D_CUDA_v5_impl.h" - - -#include "tnlFastSweeping3D_CUDA_impl.h" - -#endif /* TNLFASTSWEEPING_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt deleted file mode 100644 index 48382df82d..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set( tnl_hamilton_jacobi_parallel_map_SOURCES -# MainBuildConfig.h -# tnlParallelMapSolver2D_impl.h -# tnlParallelMapSolver.h -# parallelMapConfig.h -# main.cu - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(hamilton-jacobi-parallel-map main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (hamilton-jacobi-parallel-map tnl ) - - -INSTALL( TARGETS hamilton-jacobi-parallel-map - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_map_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel-map ) diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h deleted file mode 100644 index ed3d686eb9..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include <solvers/tnlBuildConfigTags.h> - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" <<std::endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt b/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt deleted file mode 100644 index d4ae619839..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/gnuplot.txt +++ /dev/null @@ -1,32 +0,0 @@ -tomas@tomas-linux:~/Desktop/VU_CPU_MAPA/work_dir$ gnuplot - - G N U P L O T - Version 4.6 patchlevel 4 last modified 2013-10-02 - Build System: Linux x86_64 - - Copyright (C) 1986-1993, 1998, 2004, 2007-2013 - Thomas Williams, Colin Kelley and many others - - gnuplot home: http://www.gnuplot.info - faq, bugs, etc: type "help FAQ" - immediate help: type "help" (plot window: hit 'h') - -Terminal type set to 'wxt' -gnuplot> set cntrparam levels 15 -gnuplot> set cntrparam bspline -gnuplot> set contour -gnuplot> splot 'u-00001.gplt' - -gnuplot> unset surface -gnuplot> splot 'u-00001.gplt' - -gnuplot> set table "test.gplt" -gnuplot> splot 'u-00001.gplt' -gnuplot> unset table - -gnuplot> set table "test2.gplt" -gnuplot> plot 'test.gplt' index 10 -gnuplot> unset table - -gnuplot> plot 'test2.gplt' - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp deleted file mode 100644 index b13498e173..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Jul 8 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu deleted file mode 100644 index 7101976712..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cu - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h deleted file mode 100644 index fff21c77eb..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/main.h +++ /dev/null @@ -1,98 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnlParallelMapSolver.h" -#include "parallelMapConfig.h" -#include "MainBuildConfig.h" -#include <solvers/tnlBuildConfigTags.h> -#include <operators/hamilton-jacobi/godunov-eikonal/parallelGodunovMap.h> -#include <mesh/tnlGrid.h> -#include <core/tnlDevice.h> -#include <time.h> -#include <ctime> - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - parallelMapConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - - tnlDeviceEnum device; - device = TNL::Devices::HostDevice; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - - typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost; -/*#ifdef HAVE_CUDA - typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; -#endif -#ifndef HAVE_CUDA*/ - typedef parallelGodunovMapScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; -/*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - else if(device==tnlCudaDevice ) - { - typedef tnlCuda Device; -//typedef parallelGodunovMapScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; - - tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - } - - - time(&stop); - cout <<std::endl; - cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl; - return EXIT_SUCCESS; -} - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png b/src/TNL/Legacy/hamilton-jacobi-parallel-map/mapa_png.png deleted file mode 100644 index 668b6fe24b17b2fec486db28505b41e3beb2091a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24841 zcmX_o2RzmLAGdLE=oslFAx?GTByo~)vMH-%MD{Ez*&%y|A`~HHXD2Ir=gQ?Idy|#D zlD+bL-2d}D=k>a;>s9$V=Q}>1_xgCHsw_`S!$LzsLPD#kaOVLD38~b{e@HU;6PZ8! zNAN%LXEKT!NF;J(Qe}dK1Vf^DM_R-6!?Kr-HS3<&vHpk2H@E3j-d>Vb?7dEG*?&2p zXQ%m-U{gigog&4EzTYkOg<=?E>6eBJp}Zbh_r*>&Kx`}L-+Q5HPmb?*ak73YNmv;+ z<R~`N=~qmw`6uDSdBfX3z8_DHn4#~*;F6lAHwv{!dcSL7B{z=#*0s!qdhb+^%T`h3 zXe?4m?lvwJ)O$~V3v(Iimd{h;j$R$DEGaCspZprg=Q()_7g9EoR)tY~P^8Vn6dJ)U zK0H3&G2=M!*}&syYhcWFXL)(KFHg%^<bpruPW6g4)=A_-kV%*2yJ<pt*P!jQeaowO zRL_DFOBiQDI};5>eCCLant`^i?(pz%_tMt%^t4uip|eP$(ceF@T%4SS?mAkh-(y=F z8yi<vY$95oEp%T&H4WRSapaQ+nW(F)zg568qkh(U?BtjKyrft>CYMMWiES;esjaQ8 ztc)Yh2(X0pEYx@%Yzx<|)24T2cUcOsw09*ha+9Uw+z+-_j&~!D|25?==GmbhRhLrh zwJjG-`~lyr!F?s<5!Gn|OW5l2^2eS9YoVw7aS}%hD#tTMe>3{5#J8xbLmH6&#>{d( zPFTiQEz>U57h2X`^m^r&NRlPOr<)%YX>ga0eCT<5H{;OTM<kIhP$mtho8Qmpy&h_` z-yJI_Cx@cEEvPjwM5c}w6cG4QTwGjQYQ`V8k668$f)f!HWeshH@5D3rE^RG3eQkbp zgFS-WV|u-UhbU}2y4zn~H?!q_KFCi)IXZSxi0o<UaAjqsr9j(;XO2d<Rq0Auc0`O! z!NADMc4=v8b8|DSp;cOEj5NC0l)rVu6Neri8j@CXvbA09_`BTk*|2uevC_(;--*@l zG~yysCaY&=V=o+;wr@#jSPx0lhJSB!${+JG4Y%iM$nZiHw>i~(Dtubvc(21}BVpWB zr+>yVEIeFBI$Ox;qF<?Q{=kxfuCDv~lx&86zx-Om(w1SJr`zMlw;5gt&GMg>JuM3h zl+m*g7#OI{<8rjWC3(~%d4Zk1&T%HBY0~mvgQ_<TSjF(yudVIuf@M-S<~m@Do?;Zc zP5GnmX7u$v%<Ho%8Pc_u%F*Cv3T0~%_xbB-XV)=l|GlGwo|d+~qr<4yy|id&QHJa? zGOf$fGCsWd(Tro|SlMQmRSJ&AfGt%Om#AF+zG233uQ~Lbo}S)0KEoQPg>F5&+O6#o zyW`=4Bi$(5fkebNhf!1h>wJ92%NCLce`4!L`H(wkRkR>W7+vsVA0Nr&4&eirkTiwJ zcXu<G&Yimzp8iBPz3Y2>`_b6=(FJ00fmMSgx+<yx7ox1Bgb0A6yy7h{Pyf}b)Ue)L z98Dv8>FxUZ`s{^tAt53DxYrn@NTRaZyVTUwHJ{@H!-8_fa?y}gEZ!f3Y8tdIgWZJF zD8M3GTUr}Y;-7|NYw?muZkdjYkGEPrhhd}RVx*R4U}9owoB7kw&|q}DWn@(4^vX;p zt7oWzH8JG#6?{k<53yU|=;W`Gj}PbWbE+pdiBlvh3*Wd=^ySMt<t#HDTZMZn+h@eQ z_64F^YmT-@j{mM5udch|LW*_s5nYr*&e!3Xoaf+(XnvHCkT7H&V>Tq==Rt#(HK=rK zp4$j)G6|plZvL!2w#~dmtYglxvL%A!_LJ!ypW~gDmKF<h^Pu41k&%^wc0`aPSx8!? zK73-AgdX|4)GkY02prAx7cQ9Tlm%(L#3~M1Zx5ANNF>`C8&BMWyYuVUug%R(*t3V- zABWlzat!}i3OqnN+uP&ucw_#!VH;(R>lEnAu<1NZ#JLTT#Lw|KrAVwA`ii2io}S0{ zavUz?%$YMr$NK~0uG3)&!geeu%7=lD2Fep}-@d(bsi?U4QIYn(no|L3+pRbyb7$v1 zYoX*tp|i4AnL@qmeVm;I<CGSis)WZqpLu#7&bBAm)$L^(7TCqj^raK2U#Ddo7Raj! zB(#@}#7OAUqMDXns)ww@Tc*W2?nOF%4L$SfpAw6Xsr3pB64f+X<-Bw=dGEXET1|Sc z!^`w}N3zSw^UDY_zoMcW)nh;ZtC>*7x-7BCOHr++{O9vEJqR>WW^vOM?d|OW0ReX| zT@0BT92{h|t8rNB&+qWwF44?;cr=|oreYjp1~s{@t?ln$@8pJB%8;}liK4MeoaN=^ z&LU&^dgX9t&2F(rPz5yV^TsM*9~HeHS*deb84hC=_MUt0oz)yTD$;d?5P~w=(=#$M za)hg*@-ungWofEYrqBC~?cA$7G=XHU2KD0|_}3#Ve@EvecB<<Ss&C%B*||4p8`TlC z$_2+wBrzP@nwXdf%L}iQDtS+u;l+?Or=p{WhbSA}l9LFWZpx<dzboV3GuMou{=oGv z9j03;U-3S<zdZ}QMs<hd$4BEH9v)3iO-K4pYf=m^G`K0;pR=;ELf`ONKj$;)SUGB2 zKC-gvw4n1*9;w9e5459$wR$5Qx^2d>B~a0;*Y`oryH-=f8rPztqPHD$(uo0PL*s|- z;~ExFxHb`RsHN53nY_=>&xd;XZ0<*nWc8pt?Hg>4D(+=k1ytC=LX0A#5;80->;`IM zsyS4HJ6bvG0I{k-Nc^$2wPjBb6BK+%>u07DgQb@3Dig@rao{9zCiiQA3Z|a(4uj;3 zqW(vRijp!VxWyO`t)sM5Ft2Y|&?;Dbv#-W}QieO)9~YmNozC~h>{dd1%-xI!IoOa# zmHIJeIt2qIJO>Ys3hB2}G`PjxH*suq0Zan-d)Oo3Q3N)+ygqk=plr_<4DzI}&AU`* zWo1=QldoJPOHhJ-JG-$5<%!Rz4t4uyG0)t_o~Hy~2%v~PPj(Gg?k4&`nTyFCBi3cy zfn^7lq-fcz(RU@1`Oq}*4ivC(o3vxLT*acIqEJmKIQMtS<|RXx0=70bfikJBt*xsB zxh)2$`zy=K)=<B&jDa$+l5nb&v$V9dq$`@NO2fm$S;D?GapYqaEA_+Y+~PA2ikwC+ z5*_i(Vg$kjflflO)=*MN$OSNmIu&K2^4$0D-&a>xVdeM?sy_Z<Z{_Z~J+6Ri(vE=( zSfp+3<t5IH`Xgq<!}P3DpEF9D0pZCf_LcrPg@?G{q^tJMRz%d92`#Ib)dPJ$QMsU? zKt5e9B<;KT20VCT0wb&TP(tr&NSi6Y<l!WJfkCy)#%#NJ!Wh{m0~`eS9`h16GqWqq zD5g+ZH8lP4Q=x=Fn{#mdg3m;Uq`f1~nDWPU%)wsf8tHQr`B9mTsVveAB}1cpj}oan z2^2N72r{I<W$Cbr&wfvOdOAXSg5aH*&cl`t^##vt9pBm>$U{sK^jiB_dvtX4^AcV$ z5<tzQ{ccyfd`lfB`ffDQObrbmezRNBug+V5B~UX-Ym7{QhB#A<Rs6jl^5wc>q<d|h z%7pqwB0tXfmt!TI-00}DvT7fc=SrUrXvdf_2W2h{vt((<T;Sj^($RU<{D@Y#nu0-g zeW537X=!QQmF?Y<M`%N*rrRG=pCavFZhr@T{N1{s7?h7hD96MQXS8`Vd6=#!`qN<X zm4EzW{|>eksiLn$-}U<4NY!e}SlkAfO)#OIGN92?pnvH1%d~9GytA_F0rV2C8|}go z5}AF}ijkfY9AeLtBz=;UIIkV|>LKJL;!Xofcy(uL@LYp(qSmXfyOHmdI8O!o_bgzM zX^iXv%=H)~m*tMsow3+TaT-_pQ-NQ{s-GWu`{Zc&Z?XJV)ZvMdt9^`dCh+tBnP?2~ z*jdf_(6HzE7dDph)t&UNV0w<#m6g`w!|tc<;SHGcUiS9CyW}})ohUz(+`U`Srgb$W zvfaFA0j^6LPP3pa19z7{uGgycD5Tc}9SVr4Sqk%}-TY=8>P^43P{ixk0=J)sWG+Cf zhRY8Z;%o3#*OyKr?GB@hi;Gkop7zB;ULB{Ir5V^-w%o<<_sCO!$G>iV<R>%LdQLIp zK}gz%ht~?AToBD<)vD4%Q`n)I*45F)r>?u|XZ7gtFcl1(vWd7~E2tQW3;FwtOENC` z<fTO)Y^L*beEfJl!l$OBB&yv!RebUn$AT=>#YeD$n%A6c8^+&Yp9ru}QBg_H?)1`c zS>n}cQ^-?esZoKWaIiGE&xXleQB1?NEo~{^zps<8iI&xq;)=vVE$}@4dvvg~HeTaa z61E?SN#P2af>oCMyF0DTLs>i8XOt>Wy}acf)r!>!Vn8^EB=QjHP$<A5=01K6<icA_ z+;>t{e;*wAV@ykQb~z%kT=3%Lw2Zf1pGGy|%SZD2tkSzIQ&l@H1#H}sxI+Nv{@GoJ zr<c{&)7Mg3)G9<U!lKn0sxTmk#c<=CoO(_9OEb&qbU#(6N33c5Tl}Hbw0pq`ht@ST zoF&k;tV0ihrp=Ecr>HD@6O3(r_3G6Lj*;B&il{rB;o}%#9T!_(SvjeYXc|CU0LG*G zww2}97zeFNMG}A9xdfephw0w6dO6xnK$%vhy8f6Hi-U(+K|d#0=vGRlW6Y!(XaZ%# z*pGLoBPgUB9H6h%Z}sa5C4^yKmT`zu`!k<EPmLf`NfO%&8{ZA<XJAH$2Q*H6`}S>h z)wW3c({X(5J>%*z2~p95-Sx+6XuT#wg(Sg`=TFH(0m6l(Jz?R@P(=q)u038ml_pcE z|5=---^IOvvD53Va#pgK8zWt?tQtbvyn4)5faQG96H(`Duz<SxLX5AljMP!By<$xW z>EC;vU6#Fb8`rO2ugxW<2yOC0+42iYx;cZgqLfvG3;76c>cfr=e+<&{sBgh3m@jae z5M;vl_DmDL)(~2@TzVrO)#8AhSlYjD7$%eY8!l2};sxAumHYQ;>FD@TkA)H*5~#>V zI)sFUk5`~S)Bl~IXQ`vA(dU&<hav%-W$X83lLEH;wGew>ZGQfX=11q**?<2032?hz z{X;YUb>@?VcFm3$byO45A3g(6A<Y*xPFHb!&)BG_xGit-vxEiD3{Tj>7K;aeZt|6g zc~n$UQ&Uq_#f|jmAxgb&Y?KBdG*vPa2Q9y6Ve{aKNF<7N2!}L8Vx`XtGo$`)W=W2X zjXgwXxY4&KapH-^0PiG{Bj)xvPrpyY!SO2@vO1NZhVHWzU?5-G+CTHah&f2w?Tj?T z?+(g<$&5qpAd`s@RvBYiwX*W^lYV7zE4d>gEDWwndKW=J;A7;N5m!JOE|&NZhu+@V zSsJ~WjiW)%%cgflNbFm>x$XU;d@YxnMUBeuVh-|~(rMzO>w4UNp@un#l>Xw<vp=#a z{xoPa9nGws3r8YpIQRbb#B`{^tx;TTEe>n|y#SXGXINNSAM3bSTYoBXVs~JBUOm>4 zEP0&ev!8WM>vM0<#aQCZPmi_Q2+6;M<c<xo@%U%wySWOxEIp^ggnxSN7zCLBs%CXI z3rS0na_(nlBv7geTmSPZ$&L`tiVD(NP}49}AL+TH_6~rf6Ex|pbLY<8%ZMDZF4N6t zkFc<{rBkL#9882(e>bChYU&XV9iNmW(bF-z;U>+%F6L@K_v6C*$4|Wi>$9kVrbsgY z%#++}RVf~_P8WHc%26WJa_S5btC+Ot7;!YDPRtejA_CVBJ|A&L;_&xvX$DG4%28Wk zSv8<qP6RfKs&_Pg(-oR|V?#rG!9KgeYhG&}4>?DS?S4Z&{k=R?qJWLOn-Om_m)E%j z9pja=iGR5=*%^dMk+zk6O>o#n#p2lyD>)K{?qmRGlsKClMxl7!j8lSgV^w<l_U)3A z62pR6Z0jnswVc|!%mq&gAE<ZMLd4?b97#v`5_V64;YC0Ixio{W@h|H6Gf;~DnY0Jo z2(Q->5d$%;zAOdp=IKwWlJt?5LqY}fWZadVy$;K#QiAI=xNCQQdc0LYv^Yd!pTyU> zu1^J?IyYe<dPBqetSb5fGqXK`tx#@vu`f@aL#eZ|uCj7(zAGI-yA-BVF7|+NAJ_|` z(tV5jJ7s_WFNH{KwoPAw#5JO$I_q@?gr8XdJp^y`-AaAl<PJVw-qp1=ixM5h$S=D2 z#F_ZGxEstUuBA6l0?RH3fd6~4RO*k9)_g$CxxmUQdU!ENCN(1?!&0ECw3LG1-9}&w zSG95Z*0mHxT%1x!nrr{6e!k|CJt3u}%KM&FOZwlX55DQIili{Ve^1^HpT3(Bi9uHC z|CqF|*&co>m#$VYa67FohqLqo{($!AfnmWpS+%4&VIiTE6$dl3jEgoA?NZ4N$UllC zpuW(mZBFVUx2Z8s|2XKw{S#mzAinyr65@b)cWhu4g$lxH`a{y*D{;og#9VrdY;h3l zXtHbp=}5YwAYnV=15I$#<hpC|(DKyOr6zo(KH$t1XjEMHK4p`7LFe;_N0yIVKv7<c z6c)Izj;3&Gc3`VYv3esdb`34-N7^-ex<VDlbNzbbp)P^#hbcca78jR(^O7PSARE<} zMV&dNHd?1!B2I|y-xkTg4pfSnn<u9lPQMtt3YJkvPwyI<rjru+EH__eP-@PgDfn+_ z^AreGKYl0^*nW<VzHXVO3@`yy0W`q8<h&`0#kD#-pw+Zko2S+E0mBOkKT-OWf8Vy6 zUV8iQ(}G$}YRolW5Irs;9|^D|Boh5_EW=z&bA?#NNqbj2I{@0^K(|@KrU=kLcnzum zTz+e5F<IBT&9in#>Xa#lK>^F?kCA%%ygE#T{H0;c^INF-+znT;O8r&)Of`lVGcz;L z?4dA6{sK1nxJWzRMyXOiXinC~H~&VXF+<e_4oUBtx;o{^k3-f;@85SWZ2_8B&MNVv ze9mU70|XVclvtwQ4^fSvO?NLIrT~c#E+g7!_t|%keNEoCo3nCo_-xLr@x}sm=v=*q zlo3g!ykZ#I-2M9VLVC7^w`}A`4el+twkO@VQlBp7`tXWNKqFpI5Tq*B1m`8fJEB?m z^wc`x-}KNlc;K~T8X6Ca;rcGaP4VA+@H2+2nPk_eBP0zC4JG!;v_JNe-Q!ks5)706 zm#a;1&(qt^ZY@~y&_pEho5KeaW`B&e&<L>D3D1!d4gn_#AWKkgsOZ@njOwg@0!Q*I zQ<SeUxN~$AFou8wwdD@SHT-D6TBy3XxI4b3gooJo-KXhmET~zl)ob6T9g^nUk_Hx> z<kHvMqOYma=Bmr2qys@I(ne7R;K!kiDJPy`l)b?c1}Gv**fkr05Ln$>raEMNEjK+s zKM!POvTzaKKQI7=dz|G7vt^#x#nH2f$&)utU<+!XmwwaQst7<&5YKFGRrWSlHb+BO zTbn>2Kx+a1m8h(BYl;e?%o%mz?a#{$h=q5Sse+vgPF%|j(O7B?KHY{eecs^d-FuGr z({P_ZfBqW(;m)Pg5db)kIy*am{lXH9L5O6E-Qk4&luP%n=lF(ykb~w47r%B#?Ux(7 zP>sOozu)|&7(VLr=1JpJulQc|44*cfd10boO)*9{;g@<wLG2x!qQ1U9&@U@1&!Z@} z+-pIxg|;31*wpl6?8bw_+sHdv0gc82Ecr&XmzhEv-vqR|t^2TR2i|)$OaY)rSomB- zOT-m>iDV-qBjGqDsKVDRSA*X<Vi<fm&m?!mcFfUIHvRtndtzeZ*RPL;1#3g}$}uzg zyi|+og=sVMKjz|3hshc6YMpxVcj+I#I{zanmXt0_DbYU<`6k>X*duNac8lEv;Q@Jn zWo=E;?azxjx2V>Cia!x&{<w<4Ppe&UoRm8C>XB-L_0L*k^-pcA_l`Tr??oaK_gBi~ z&%%PbR42~uIf-~}8eLIz&lyVh^PO2@YiUFS!V7dlzi6BLk=V1eRlD~gFm)94U|0^B zR1%r(`?Uaw+RaN+!n=D}`vQ#qG}wH({SeQLf`5Mx%h<xG9W&!8G5$MMa()PfAOl?8 zVagBuSrx~O3c44|>-X<vCFv~|50kj1wzj)i#?!K=S+aDWEh%xHK_Xy{<D>WY6SZ4N zS{zD-JU1Vco4NeUF37EWHJ0d)0j&Ty&ILBMhPFTu2Xk_+7D~5A9M6*eX`QrxKV1RO z2yM0^at)AMc3TCpc#yCAAZ)u4N^_v)($dm`%4jWg>h_9z?N1OMt6lQdPEB7?<VMo~ zYdCf4RLT?A)zMoRoA!9u#Bd;Mq0PlZJRKVc2M6%?*Ly6)X%r*>?0JeLYCg;SeU80i z$Vw>@<a;>h^>uZI+|f|55RR&ail2UTFbcgkD;e4u_mO<7pb%*)zyf>#IQQ@0g>CAb z_W{zS;T+k@>+9=_iqvrEkmg5EUv#PX;zIl}0NQUQ{-DF)A1rk*enu$BL9J$R$s1qd z4tc;ETQal^J??hu8w@f9bL#9>!Q|q21X2o<?76c#24~hq&k2Qjl|8%JN&H)8#PFHK z$%O%N6-@)GZ&D_kVx*>)7P+64Py%gmQ_3-3SlB`+fz`r}OFCu-NOODp{eQXg<bSQ> z;0+}vd#kS(Qw6y{IL4wX+_$X6*n=}`{QUfG)?R1erXnM)A5T`|+}YVN(-C4uK`Ug` z5O>xk<@&a9I6G?&s^g3!6zb!$wzuu(P;x+Y%TlJRKfLhZ*_TfD3b=ZGRh5;k`0o}H z8*$u{GO}t9ax@5RaO**Di6uTM9fluu>Kt!&y<ov4$Usb?&C}~joKYtN-C$AkEy3xD z31d^!JBfldk3UFBo--f~NrOrrr$p(#seHwrGwPLGI$Mj4tt~qomZTy8kFH`#kITcH zZ@>=n$9*?10aeq&f~u-**hV8v6`i5YlN)ue#no6R=0Ht=M7C>k(xiTzKEAcUJuVH` zxrbb73zQ*kDg>ElVc|)9W~r`C#2w-rMBu_6P+@RpwEuK&A6<4%Q^hT>uKIJn9d>mP z^Ydua`hW_PYr-pG7_^Oy>cK-YDK%xzgc~8&0a}cmo!vA+xo2~9boBf8*CLlcn@dGv z=iPj3&`y}Lk!-n3$sJ8j@ibB4Ey10OubGC)!sKp>h_Jl6v%S3yjnNO#Kec~@1KXF` zKVlIk??C}iO6s>N1*DQl9T5~Tw+DhF(Cv=7Ngc{5lQAlsdroKh1(gYJ-YPP-mKB%k zkrhKDBWSykSm5OK<Hr+?<VkZ^Ls&RC%60QWvNU;b$egK#rg(vHO^T2M6r<5Dn<59c z!S0cFKx2xKV8?Wa`B7P06Nw)Id3tTE8mb-`GGqO)Uo(z5D><X~3X%V8c&4iQ&VB^h z9w>?nYu|%k8*Y)<hpz8^%p5a}RAeO7j)JUn(*Tpn<@G76s6<3WluKRErS;Vbbrqv~ zgaI8b&}gbZMW464=*GcALQ;c0%C_igt_(bO@ht|48I<JbcbsWEs~y=pR;sVS8Jbp6 zTs*b0CuPPS6Hzwu#U)LH`#%V-;jHSOLB$(OEQ~IQ?*DRO#_@dbcS5-6hlq=cjJ@pD z2efa^ZoO6b^yyQB7~HXZjw$u)chp`84T26@Iuc_v@p0dDjbOqs<Qrlpz#{IFO|EJ8 z{F>iDf4^%|@B$E~0o!L9A)XST$35H<B)n=cwmg?r>>Jgf$KaWrEyXSiXmZ9eU+snM zd1KY2xxEt-V02V=o+S0kmXp(mJI1P#t99t7Aa!CDBeC`4d`Ow-DFTBQSX;^k>);lK zPv24FLDPWk2&I$zdwVvMpOmke4(vr_>x#PWVHICU4E8EZ*jMRucQ^Wud6b?Cc9dn3 zeXIdpcy*Qc-yY2pi;odB3Xvb5uzaEFP1kf2@vBb3pz3A$<CL(Bg(OTgV-~e^h!L!! z%Uz8JdxHO8smFqVlXFg&Z;|RmZ~JLm-Xz2m(CBM2*xmhrI~pp6a+W$Rn?zy*w>3u| zd*LiS&HGMmBNZjmNEXTf=_5NNsuOMoB^l|UYfQVjM-80@?WTKeqjAcUcnmU8*`i3h z#?a185z4@$lZpcOR+}dV>Yx&*^CRw4y3bAp%J9ZMO?iFuS1pTU4kzG*gClPo*?%r$ zUqn<Msnb{R7USa1SC4rwqs|AJ@DRbEc#!ikvZ;6Bv2CI=DhPM(k3b(+b}ki<F!`(5 zZ-$lZ{}!W$0K(r+RmC$yP4N1egnBbfdBLb)K=kI#+iLIR(r-zZGc*-KHDG>pT2_rV zi<pc2<Xw6f_e&<rmOoC+#HCzWiaT47*se^8Y`iTIH@63b1E`H*ln7GbK1j)3U1}iY z<C(Q%@-r)ZPlQ3Jm<jE+c6Luob%RWFjaR7Wt;w49%?ab2jIU%;JN;dyBR_7-brqT- z<o1j$*>xpGEFZ}GViiHYwkLp_{K4)5U5ifrATA_ci8G*)B=9w7oUV9osUdCGR}(2& zu7q~EOQ#7_{W02~SHP02Y_>xlx^ym9fUl**nOa^qy_um)AD{Uo-kkZHF<wbY2}Mso zO)z{WOc!M0Qtjp9vOU=p4C`8y+3%Z+M8$uzp=a^Cm2%7wCI=`;@5kJ4+b@5@Gafwo z&$!qSqqjKlCib>s6eCTT^ELCjEP;%Hd%`m4O`h;vY6?8&krkWeTLDzI1!pwTfzuV1 z0yIrGoS9xMB3{pS3XJs%BZSU^bY>~$$o&7Z0MZO!C+!7TY<|{OQDYT#d18i!hCl?d zuczG+Pg%b3>JA(@kJ2cO`^FVdcw<Eg1QG*<mj-%Inu>fEHE#A_ZRkHcuR=!|0L}aQ zVEWL4uP1+y37j7*s}M6#H1c{D;$thimbQSt7b44Q-P0|0Y4yH;cvQq3(K5M|&@tE7 zFwz8oG|bAuFY^@c5DaioL+uIr$7UV~&ZsA*rilG0D4?yTR*he%L())9s_3o5y=P%^ zk4uJz1*aS-=}|H|?LuGldAXt#2P+ykYVNkp$b;WPA?>?e<^K6Hk-x<O487;iwXWQ& z?WK>zf{4z;bjsS1jf2DPNE#h8)+paEx0c34&qL%FIMKD8MDqV~BGk}8J!nI@uI>HO z=Gg`Zdj8$wT$BpMcg7PZ{!5qThoq!!SFtxLw#3uDkBUI%=i%ck*UJB69sDBbX<gH4 zEi~*cb7->&aj#dkf;;-IsHkX{CHfy;r~vie9{kKfPz&$$0jw#AJWcj13I~Vw&LtC} zgnZ4XdP$cpHX0aY2aXH<crX2X?+PPz(%i$J#-2PF4rPXaT9es+bpR~pjh%LrU_k!! zs3_+AP-WrB3bbZ0efs$U%#?<`ZP3^Kk6(hPu!4|dIW2peTn-d%8Qw!R8|qwcKt$aO z0<vl?-g%}vL62)NNZ3Vi-lSZ&lIeyi-Xr{v!L4LiefX+goC&lm;GQbU676#vE8Ro4 zo+Z@^6_Y*z_`w~$$U=>LdHIgor{1UuyFYtk_6O_JEk{!;)V|GXEMb)XzW`&f3fnrp zd2|}ZC5^uDAK+fHA>8zP`ZCQ-2PiYJo<o(;k@xX5YG|;oH1og&(Y#XYy8d--M@yh{ z#3m(o(n(})b{2rEW?lwv@x={H9~jc{nI%KkJWNX{GOcO9R8sOOQ=A>x2)0u;6jH;W zeay_5F-!6?g?e4>*YeAJ5|WmgnF(o<M&m+l9#Ewoif>y4YUX*Zlnz9;npUUH>*_vK z*35fnc3$&*19jn<5_Jpz6V5!i20EM+SHp^nUcTmunu+>kYh5lI24F}o{cYxgJ)x$& zTv$Y8cxp~93?ZSr>)(DE_@(|E7S4QKa)lpx+@iUZ4FqYkxsl!7-Of%`dC?Iq(z`v7 zw0Nt4qNOd}9nv`Emr76i0l!=pWGnwBY0*8_1C5ZjEY*$A6wM1CL*Y^6?MFZsLEkx1 zhLH?ht9Q4_D5<E({YY?IE+l@WaN3}f-@P&@FqE2+66Bk?keYh&U&->3m~AL(CsJ_y z`}d$FgOp>C>FdLvnU!@rwG-r1cx`SvUAv`!kBNmw=0qeM_*o<oP4Z5&YF7U_=lk|q zcu-akb)2bo^l0;s?T0{WQj|2gqk9#M2|NBD^}t8e`M~}<CW3769>a@1t3bsDFkYP+ z<|7)7@hoA0QC3_)6=cV*n4swBPLSe9_iJk&RlEW(jZI62lEkO#Xb9m@rW4wQBqb#! zB*3|;-$+~=t$toG<|0TTaGw=LImiQ$Oyt(Bq1O)cKLUv}AuZDo2LSUetEcokb+XKa zOezD^luvn)ChpPj;ZH1c7Z<QH%SUM2=*OzI63ld>S_OoK=j;h0U#8QBe5I7ovf4?Y zeds<L2mj){xg{PV1Q_G7x$FKI{HgnDy`ZrDNS2fYvtLZi({-Q(1dJhTn=gfazBgb& zVeuixfy@;^%Wk=O@c0UXO}XIoZF*f(uB-megkvSDRA2irW7uS$iCk9AQqMvMFyoK* zcHkZ9d_7jB!gfCoLGuStqibiHE*1H))s*GJh0-Bw_gZ$W#)!?kT+jo!qje1o&LW@< z-|n;$V7VG%)i_4|D~dwKAM+0O`q`L2>vIHbEl6|;?O^<EY;15HaD>Ti<kj!z9Us&m zSC^L$yxPM4%Qdp%0<^}l(*0(Rp*Tox<7*&24Ot7plP`c28Ht6H^g&S9n3B|e9vpT5 z180OP8r}^L(OoR5z;osrAzUj5s}%XM&+3X|u{sjI0<|qpiH8{9J_`wgMCE~qC>7r{ z9A53j_b)3;v2}8-q8q-YJ4^A(7(W;<>Wt9mJ-4~^w~OOQjaEe!K(>IuKls=Cp^DI` zEd>BZEG#UXxb_J>qzV}i*jk{eLYI%yY1*h6UlUGfzplFYVEXI3eppUGGW>CM`+Ws6 z*JNzg=|j@W?ZyqYwRby?4^^D43YNCqwbk)(U?e1dYOM6Fl2{kDnwP+Kk+@A;Nels( z0CSB=L2hR=m}+F+p6&RXaNP|Nk=!oJMLPv%7MA_5=jsaUPF|mb+>EuI?=k!Xy5~Rt zAe!(e)+$}SYjwW*RVbVL#J_+40#}rN{s7&4Q~Vu;G@ST$?<kLV4J=`DSv_M_&Zo@5 zVy0x7e>poh$06ogSYJPGNEQ(u4yfXHdEt|~WEF|Cg}i($VW6^PKFINupic`G)aPbK zEpEAk!>hqH8jA3S%v&m_fQU#NUa4u^2-O6h(o%GYVq`DJS7<#A4N{VqABqFv3O&bX z94`)g;fB1+8O7F8#4`d9ID`0x|8B}}P~+-gZx4Dkq=O21Mr!PYf=mVqjEwa44~IIu z`<Ji!bXGLr!Ak>sA7FmT5XxtHmAWp7<l|$6^x)9YEdl`|Yzt0RY7S9YMJPB!kzEGL z#$csONI)8*)^H7sVBMa&P?JHRS_Ow~1*5ed<!;aYsOvyRfHT7I3?gPnuQj55W%G-) zU;n3z*xSERz_NIpauxWsv6rdYTfMQtiTt|a0$~k^;ek!w|8$!XVNyPlu-*z95r}K{ z1VFT!d8OJs?Gk3rXM8o*UBwcW*H%_Qc$htVN@{~`n^Aa;9HMuRnJFLJ+Lj*-_#6!w z@SY2w&Y~$0CMSf`)6v19mBOeV_en(uWj?7jv@6w>mzTfJbgx=7#0l3n!*AU*2!(Nc zJUD~wezY*9A5~}7{Fz;>uWu;anS}P_qh$+tjTmAY@ynx?&&CgDg^v$Ajz7%pv3w>u zfAxK2`H*!%?Y6GZY#F6SZUuyS9zVVur&PAPTKE&v3_=S`7gUVdTJCf9tc;Yy!ORAz zl(?D4uAA>-Ya0$mI`Y%N68t$>LWodVDfg;LIhPw!`R!{v1)o3v5gU&wjYz|p^Tz?z zgFQcaoFY8Vq+taNnhLK2p_fnd@ID|R(L(f(c-~l6kXZcq@ng+AFsA$uR?hkTKqy8+ zR3%mQBoYZ9)BH84N#E{c{4w@y17fX~eO?PHkm5nhw*UB1w>O&rV9efLwP2upBwNeh zY)JU0EAZYkgk^hzwGiw59#AQ*O3MsmzdXf+OwG<_j;{rCe|8`40uaR?_u`9e6W`Ap zi2em9yZWQ;i3v;-UO5XuZm#6Vk5BfW8XXKna;Ph;1yAyEV5!!yU|`8cy2rX)CI-lI z{eMpMnOAqj*zL+?`7TGbf_8Hr6=aTL&P@bM6`X!=OUr*}91S3QmbpN;pO%iZ`Y$vp zm+p7Ll3@=LY``;wh2<l$0tR13x_^+P9ii4~IDjE<B~LJEP(?ShU&05OfR)E!_@ko( z2!M;8#%1t6pqJ6S0R=lTk%FRM;>pw7IT|8x2cXbf19u8)GI3V+s(k@fEutmiZI0SG zKQ%P?<aEJJ|MPG!U%t$(bU%?AE$b$hl&4hy7CK<!va@}OWN-9mPvpgd{Co$ttRTgL z#nYk9mhojHE2t)c?KZ1aswRAA$)6vhYY^@;VeY@sB2zMS=uyAp!Qr*pwYX?iZL=WO z@JAl^jfaN^<hnNHS(gMC^OtEzJqD{?&Bx4gU|2#;KT8wXbScV)6Mw&lGYZsTFyA-6 zg|Q|&|C3J%?xFepYT5<7)R5+XlD<6wbx;mEjRAPs;550!CocdAb*j{V_Usu`=nP1x zt7Fw*Apa>?!9sonvKU?mE9K{dNZn$ESbD@55GMR_3X#iq>bsc0VVvtoURz!Lit@5a z#9NVLN0wbYq7l#mA$F3|iD1R!QBh`l8Mn>ys8L?Bkoj7x*`Pb*&reRUxna+ioRARO z{3wD$(zg7%72+lLy~uY8Scnw4B|!%GA@_h2lT`lRFI+t4B@~v%#zsJx@I~ytMMQ{r zX?O#3&<)hHzlVD&UW@9s<sQbyDa5pyGg1tQ?7Yp4vIx+7`1tsmK1$=6m!}9Bs<;<T zCZK`}48B;UB&dNh0#Vk-1til&g&$CVGtVOA)SRrXt-Za0jYAiO4AoOvG!9w9lW;d< z|0yrYjH8>4&Go3(j}NZakB=|Nk6JZsh)FZRO3A>L43oRugomRA>k)k2jZ0rTukR$E zYGwuo`EG*wX=Yh9wiZYrLFp7@r&pTWX0f~GD-WUP>gQ{Q5lgezLKtZRfv$5$4-XDn z+NC=#T?~1|8yn!w?vD{;?>IaZ+uYDlH@c&S9`=jBXPn-3qOQgg1^M~;MMXzgUb#mH zV30v3_H1kS$xivdQR2K<UQ<~3=57Xrn>`?P^8ESp`t8z!`}gnPK&c1ZL3zGS>%zTo z48s;`=3Rgn3k#r<z`Z_7!<thh9b<+gzQC>kNq{{CN{6X~`$b&|Qcy=Q$k65odiUsq zOte1hL$d`jSK&4iWyToXbVEYoB<KyB#C`7VCJP%~3-l~-eFsEM=7n)5*)KacxAQJN z+XOrM%WQODeheca+X(kln+LS8m>Cv`I^qO&fR1GKxIs&RNS(wJ6-r{UVZn17oBU8} z{r{v99NOQ;3zl6pw<4?=-ro!{R-9eM=3S!$o2GW3f-ze5e2^PB*oHnw&QYz1(i{no zJkt9;2KqaYA_Hy{NFgK7g*(We0RInmjGUajwDqGV^0fwsOaKyM4zPvwknnD3Ao<v3 zRpG_g^r%R`&U3FZfNE?q)>J3vBq<5{y{p&{2MCERY;0=)x>Us>&SVN662hlJ53?<o zzw|bj8FV42a}LwrIy)Jb$Ce{{dnxlZfn95tsK(?%0o~qq0cqEizsvH=5aagP3&+YD zQ1wbnooCvjS-ja=)+Z+?K_8W7P`-r!)anJ<r<QNujQv#41^eWa_O6TU*t{_KovMo5 z>;xIQgXx7836iTzz<`G-Ha2$L=lF;i%Qw&;fuw0;37chxfO=c;a>l(_h?E2c1&!K1 zf6BzNNrsk9#kE-rLBIWBl?SBh)gAxo3NR?f#th~SSLKOZ<m~;ZCM6XW4>9%_Gl(c- z6*+tk*P$8tHyXD`c40EC`QyA_L4}4OP}u9&0R6x2J_zuBE5U$}<{saLtX*)E$=O`t z3*bxvng{C{O#|8IoqN(*WVtt)LNli9bYh$dY-jm!ME=+7plV5YAMWmNEtQTz1+ImR z48kA790cDN(K78=`M+GbyT$hu3g7daj+LKqG_D4_`Xe?2SKpYOH+`BU;YD+E(`!2k z1qKfOHB|MuIEU&ntRi~?_g6gJZ_xYlKYco^Ki)ZpXX)z6DdzR@qi%yWPme-D)*FH< zskq?+N<o_c5_u1D5IMt5H&Iwc85%X4i#VJy0P^r@NEBWUNlU|x3HJ{$yuniEI|C{Q z`Qs#vx#M?-t6B>9J%1b?1@&#uv(7y6f@0)LJD-D*qj!#vK@s?^8Hv@2iKruKDui8v z<V%pXvK8{bh$IO8ukuuJUtCWqQZ+7Z`M)bSEU;kyW?Npu<B@sInAyC11Pmh#5=c?O zzyQ+*cZkg9&<IW;67l+aEHR5Sn@+gr%eR^{g=J-BB_-qsbr4>Y)R3Ujo&enpY(>z< zi8Hw+)Q>DJk;8xf!u-Ju>-g@sJW5{`+4?i-hEm!3lQz-Qb<9Dx*upIZ;#*CJ`7iRE zgephJ$LHK6fapCc>K{qbZCEw)hj@4k1gD|j85RWhT-m2ytf8>(_Dh|=bE%8VK95ue z-=L?;D%KM*O^~>8<E;{>Taulb_|@c-LlHg=?g5_p#Z7JE>SsqFpniU&+Osrx;D-tM zifY{RdT5$Jp!eVs46k-`{f<jL*=selQJZoiH6gs*dM*X4$kyWbd3bxf41>&gplEaT ziwg(<0gVlK_>91o{H$zFNyI`$>gwtOb@*^zUNZi}2dC;WLH&;^6IP@(fzp*hZ2Hvx z_SIvc!?V#1_>?{-%>ZruTxfGlY^-{YM*RVZ1A`Qhh67<j3esW@#@5&J@SXRsD0V|M zON@Q(OofiGst#I~FeSA3FU|D@_K2)WM2s2e*K>}y-asPpQf;RGGxSN=UMRGgOx6`x z4Ty0<BRUV`Sx*hhpAT(zdHy^rGt=tyyk(G%0q~xX=1ZH#0&l9$8Z&41EU2PwCZZv0 zZe%n*IOrRwi+S0V9r~P2AyR-DY~)4&`YI}bqeIeqd$OPurQ}Te{GIcGd(Nw0Vc&mk zllqX~Hyd+_RDN{U?3EhYxq1vB3#dSX!7~hl@Ij?86B~PxtsgN+@q~Dont5}Zor5De zDal-iNlu<6QpA=Z_4IrbzJ8qJ)t!CB=X25T_+7`CMu8X7(bG3MS<*^=#@)U27Sdy2 z>qg(raU+^<0MYHUg1ic30vOnDH|ReLxI-klNey0VN0Rt7;d9VXki<zi8pzeptG}!A zYXn2mbo0xC7|A*|K%D}tAEyK~>Bn&Y08Z+lsMYk3do3W_#ESCh%!RnPI2iMQ*MO1F z(WjhGhFlPOm{I9uB3DH_SLy@IfIwH@ASUDyocNY$fQf8$KqQ%%a~OR&QN&^}SDz8s zIHPhLQyx}kiJu3=me=RKI~i<Ovo4q1@G!MIdsJ>wGi6{2v|;El)G>f`;3@#0Zv6#S z`_fy>j6fR%qKSv-pHiuu1py%t4W7xK4u^pjaAn244+-oM#A5yX45XKB;kwKrQG%(^ z-ESXgY`|1dQDm1YgMV{BB+YX)a-Y-46!Hzjph<iC(8~5|zcK6i^Ew6wwa?dnZY>SK zXoK<$-g*zHvcW+(8|4@AOGvU6`NhfWJSl$aZ_;pUwL9z|Q;bRF!N2imczyoM$lxs5 zgO<W8maeYMlvCGMT2{(FFZlM)M@L6zsU(Bj&_rRPeZYx4Nrt;rorIj&Jg-!JXb7|N zuqNP90WNM~a^ztiAV6VT!6YQkKt2tKzH8FjDmU-_9*iMr`&s-ddFwAj%!)~a(8Pa6 z;vF?Cl1z90&)y+~S7&Eu^O)S7Zu-%0Xws^COxS_1hF41Os_8}ESJMz<_ks5;MK7M* z@!UaL0cgOMdtM)iKDYS!$vTP+FRz#Y$OJ)$vIz#UXafDmYzx>nX#&4ex4!*+2~W_r z-0|wed+Cu>n*)hi+Jf#jI6z<v9|3Wo@|nE2`rk0JFC0ArASgp#-9)4yl>SSpbisg8 zh5aOqc7ZqDSAbYf5<Z=z-JlnJhbW~B0d!^i;^0OHPh;Z-OMyu25CxZEBsO(u&=@a8 zHe`$koCX}-A!E2701!a}(!7gaDlKPr?MKq@x}Nf-qkS`$of$zd+Xb~s@~}zr5OkSq zfD&0Qv(hndl3taIXt^mW3Oc#&&BOr;U+SKlVK1NK{Zyipti(^4IO?PC)898C60yl4 z658Nio0Nu&YIU0YN&$kL|MZGb?qwKbfo#f;A3s2akc7E0t&Pix@)LKP9t(D>UnjRb z9f{59Sy&wU4C;{q?*Jr<<iBVa@to>D#wSv-mH^zRBA4v~;RPZ;G=aIx^YhGPw>c}j z_p*~z&r81vAyV`{yIMW=H{Z_d-V6c=3a}f9hw?B*v^+kUfKl54$tR#u^~5yiX~Ji* zi}xxUPh9VHgl16PzDEJk(+6+l(w|JoJoCqxo124a0rcfI@1T^c7&~Dc4jl{S_<lAe zAsnD}NLrGZn<I>9smmWeygSiGBK}+R;a?_xTGcKaIr-1ZSqVu=hI)FTa_O}7yE>#a zcAYQ8Bqiz13cclW-K5l<9zD9m61MMIXSE%X`>vsrd=z`(bv%UiWz{;%M{D?s3=57I zwd$*iibxZ+?^hA2eX|(Dr=dT`@)-riUzcVOAH?)AO%Q(IU-J;b<96KTs?olVpp0oN z*2kf9wX|$$QylK5cS(hP95Q-@YQo&?-PfcVw55tOjWN5nB%^CB$B`meSY1uTe{U6~ zd?;=FCdpssvpXY^ISAsi(}WMsdPCkRFk%8+6(WKC%cL9-+!IMeHFXMzi;LR`upE4} z;R95^yG!_9^x?40g^}4EZ~RHcMx5Ec4(3#B7P<tPQAUST=hnvSff@IF)YH%$;78pA zw148HX~-IsMwF2&*Jo}2Dh+N>rSaGOoa=gO&lERTRy0k|Q~>t;H^UecW&*|h);?gh z@Q8@#J=&hJekROpEdh;~4`zm2#W9S`xfO5pG*z-u;>-+qa1)|mH$#<!HXNVH8r^b9 zk<py`wcO=TeM==;?S(_uh2=10BXnl>_kKay(f}I;_6Q@IktE@PmbC-rwULTL%yYM} zEDDbz0pc9d(p1dzvc9i>dPsgLwP!(}cOa)_f|If0$ClB3+2)xf{P=*&2hr!Y-rj#< z8q4AmULg*WFd*f|KtRZ)+JN?kEQbXdk71WpDa`1gn&5^%HtKgJyFJ77rxmX<{#&m% zAS8rVF>>p<gwR=pw03^K)vQe8kW33j6P|&4pfD?){vk-y`c+N~FggJHZClQ9U|@sy zGtxcIU@tgD?j>;lwn@q5?)<_QgRGufE18N1vHg|;vAhNlQ>6@J-?dw!T}>~-Aiqwp zGayK1`^`^ROgWl_sUDrP=8^IW!75sEJ&?4!)_`B`&)4%gZ1Fi1_Sq2TyFL*_aY-m) zr90yuBqL1TZ}ToJrYn-%h&np*3Nn$Cmyf7RF(l(KJkBVyJn!ws)-qYYB9z56zvW&& z;zbAvx{ja=CPz;ip4Hjvdh`anu<i;<SjQZB;H#h)$%S7XpFO)7OT19+njuFP@YyM_ ze33f(G$QB4TLzP|kq_>WWrgI9G(+c4%OY)<3S|y@Qf}aN$Ow4A&|^g~J`?iv)9bDn zhL>{bz@mOQUJXe*#|u7x6lTNaOZE15#OL{e?83+A{Y_PwS5s8?9{N6mXv5z&t-46! zOx1+&q9z0429)Y{$^R4*6=mhGoMFazT*#&G8NHgJeraC(alZ)Q_tc{8VHTk#K2M5X zPOsl55yjS$>IE08;qw?MFMQm2Q_{YgYVVBky9LMl1+^v08702cyAb2blGwT$(D=K1 zKqV5KdsTE;3%p@5m_&?HWZ3_l)Y|;W!N$hs$&*$~fgj($*SuRMG?U-fJgH_vv8QD6 ztqe&#G#i!?j`IUK*p3k!U5=IjllQ<TANSpCsKy}q2?Wa%9n(%E5yG%A0(Ua>LtnDW z)dO2vFu+O|9C5bV<BH!!1<LK*!j;pmBNz4(LiU&QbQ@E?jkq`M6V5UR74dlV?yyrx zGvHy=45<(g_8eeU$QQSoDi7AfF!k%i7c!}LF0FU-564pLpLYASd3|7gM35O%h@wah z$Nfq|?Vjb1_;|(}oHEE(R^8^SA{q-NK);7<lTLkDXFTW-Fxxo2z6;ZR-3yy}^O+}6 z(vF`$4e`uhhOC2=XX`P)@7Y9fr}BC>UyJfe{4vtD$Hd8$_ZkCJ$*_nyFnd^9ni1KN zL64Az5*=tQc<yRy_Y*wxruz{vGwIiVEW@co6yc5gNlj$fzxS2QQ+20dd2Ot4_Qf<* z{jh8{vukcSET0>Sd6orKI~+1w{L-=;jo&9Ei0a%(XPF>Kz*-8hKyoR)YaW!;F4a?) zegk?&QNk&^=7dREQG58x>`fBcPZ!O#-g~|ls(}q#ABO~3XumuxlS@yJP$xyWRO*LS zr5F|@Cnmx?7$jhwowqV>JOIDNy*3T!Ck3Ovz|M2Bco5<ds#|0?{s})L^=X;o#UN?S z>O&fXtwY|<6__YtwY1^b0(TXle$Z0P|CFKsjbcV(>3uBV_(N=pO0<F7XUO|LkG*<I zK-Z&%oPCGEY5-KrHr-($CndDESGaOmPz0F(LWUv0c6&U`42OqTjoB*L!VD@w_(`d5 zXmi?0=vd%>z!_UszeXMD5Bf!!C91dj`_Hp$`G{uqz`p^RD7W~mIPxS>s;AECEXdLh zdZ?^gXz2fJ$*;FRWG(>5j?a|NBtMRl+>4q$@4&<T^aYBN{Ck^o%z&)KlX4P`O3%WJ z;53Mx(52`P?(R&61v?-^U+=`GJsGwtWkkFs&a}-GLs$geytdPXWC=;T{-+_@ip#of z<&ID2p(AJI_26eA@fYb0{$CaVrqymrGjOwwEMI`#1LK6h76ED!%;Uq<3xwXlGy$c| zgt^6`lcke}?DoZTQ@<!bpSHTfo$6SfeM(9VZMrjTCdGmDA6(gHQVEd$MCJd&9|Hm< z1V>}cfcBQi%-<!a{P^xke2}iKg;}&;xoUf^WYj3|WSF48^j6`Rc@*03s!O%JoSd#? zPRMjcqcIS$=0{y~8;uvgT>ZhYsS|dRk9t9u`{PelCnrTjEsgVt4H_<5NSbDz^P@*; zr)EEho@Ne$nG(<)9$wby$C3j+f}jKhCJCP#EsSx$H0$<lo@)w{_?sfP9zK;c{R;y) zAcWom3SFu@QteJT{0A5E=imtJbXt2J)qi20h2eVBhc{OBM+IcRnS|cw)Ep64T<ZVs zFL`yfp16CV&HMray}Ubmeod3_lamLmN};^*((6CDxj6d0ZL$<)#kpbv<9QHp2}v6+ z_WWW2uuUrL)5`dn7_)J=xkL!}M@(1fYExOrOxOemwY{XVlG!53byz7&3nnS10So`i z5W~MWAZeG>7~i%BLOirY_(cb=QXS0@=f0TtdG>pqh0%DcQp$kT+r))LWF!_au2Cs_ z>K2@nlizZ1!WVWLqA^iv+1snjd1s`??yhN6x8J%cMFQ~&i;|%SIZk_jW;D1TW_;A) zaQ4q!fF#J)hcj?O4-acP=0uw@%sQ+7Chn@W9?|Au`xg}<DM@al)=5sW-tIEecgDA; z0I+EJ#d^eii$qXE!vh7L=bN#A3yHE4d$l6Sl+ehIIT)=|Q<ILwuDEzWuuBXE_HP22 z8qq%RIt!|Aa<XLFnfnYAcA^Fd@?!N1EY~k1FA*Ifs|a$?g9YugbcxD4!%qvq=&>h^ zx^K8sf&MdQdv^6f(83oZhW9Xh<hnfik0V9mLne3XXv<0s?nE(X#F^gfH$Mr3Zv;`q zhYH3G$NBu(PKPe@v45{PrA<>%um0ch)V?nwXAn081kkVUI9BSP;#s~AiAD2RIq1KG z6+^c7Sb{?0U8quF$SqVB`93dss~K<Y?k<9+v9Yt;v0#lS4U<#nVVYhiY$D4?K0KCk z(vdf8Z;P<;!zvbzoF<!PQFv*>+`GMR6-`U^(GDg_F$}ObJjC4J9vLD5i=@9kh<-hi zxuwN`2%knsXY-p&(7a58dn4Se$}OF-31JclmOd_ePt(LEC`VB`sq|pM2C=KBhGuoN z4`h1+xlSyj7uTp1n+xTP<1NNlU{scftO%{drfe5@Y>~(+2#Th`^v}tpp^`|Vt?@8* zi$i>7-(l$UODRt){(NpLW($)W7#O$}o;TY9Gw7hlX$SUPL1?;-WovN5Bg3b`r{4{x zNlKNR8rPPR(%MT;!$FFH83mWps`&MS(|!}ZB&b1lF1=&42#HbK3?ZOI|IOxt-6L3y zeM%B!0%NT}#B;s8o!O)Wx8@J7hD<>XZatcVpTLX)k?;N79te$2B4bMTs;5vWui=%l zsn`_UT?ntducV-f#7GQx$O_YRGC6FnUBv$!9E3!Id+pyru33o1FD`PMUQy5K{rQs} zmo2cco-0=X6Bx&e)%m*=zE$@9Pz22O3*2iBvwF4??T>t;$qV+ibYj2~WS8)C)wmC- zFa<1x8tK-kM~8+Kv5f6K>)+~+R_ufqy04RnUG4jw5943(k%*ALPHr_dHq*aG>uY6? zWn2)Q6d|R}<?_h^|1iiz;J%jr(<1HBv9YZ^Pd>9sV>XK33kmD858tkdT+@hjp<I-S z&kakMLi)oYKuE(nl`(e;X3ySx$Q=z&#wx-hp=BGp8=&w$mJNs45QZTI*Jg|t_&FS8 zR1|Ibc#5W5t&kh$o8NcY#NPOqYl+Jw@Dt4etq2AFoND{Edl?UK=tnv+ChuWZ0R|6p zr$J}{{|bMy`=2LUS>P832yKVtlRCA2x$>Msoyxv5KmWK$Q59|W{Q1t&&iGn~P%q`n zkPeMEtf1Fe3KVHC|60UYHrBzI4((dpk^=qcw;+pbs#(Wd??qJUJ>IIk7%+Ce4c?SO zZ^}fn0+s=BOK>d(7lJ_oPzMpG=&6oKbi?)jog7z#q&XN;TXBRW?nBYaTt_&omx3+! za~9Qu;21E4!jvk*cS5yaOZo;9^=Ik&;{vQ=bin6f{Z$iONTd>{mZl~&3>ZieHcvb| zaW>vP6lN^|(hMw}nC;H(beHX~>2Y4H{qDe4q2Cu{0h43@4S$0(t#4=uzeHliWry06 zQ);DrX#5)TBm82EF<ZOdT9H=``Eg7yHenD0jUXMhQ@7KihwcCqp`g2X?C`cYV6=0( zEcZtOIznP@1vyrJP{4vF2a&}}1Fq8*6nbQ4I-p~NvJ2Vk{+Ub2Nad`Z?QNLTkUad| z6#FMR5^HwyixoyBQCif40VBoU5^87|13a1Hc!SG%j2!i!8HVf-LjU1C16eiLbS$IW z{^r8huVmOJBX=kk{+RblnG$|^|FqUd(FUjCQdL0>7rb+|?lBVOrA1rMgZS=`@%<8J z^-M#G2+_5T<YP~8SJ(x(YM68N7e919IBV=I9m+}riVPSyO?Xhdl#_eUhp{1~1=4>* zCeOq?+gvum{I(i8d>TOOrMDFzI00!F?1I1(7`-!&&T$huH%Y4NbdNv6Ae?P^$M^5> zrrgEUsa%c!T@(I&Jt)97N26_S!|I3HtF-qDSh!jh6%{xnWJ~k-%OL6eF5>_padyQj zE4T~tt^z_r5bb(uc#z^k7J!#YzXTb2SasL~NP6@JB3vbsJIqT?)Qx0GA3Fc&QiCiG z*<_SIh9#QzGrTEqKs_Zo97YERwQYJ*ayjvZ<rn3RQs6yU9i<pW`&Qmenmd_DDl7zN zuD8gl&X_YQ^~wJ!>AVB6?%OCHA)%5-l01@-m86n!XH!!4x+N=_nTb2e3`Hcela*u> zW$!#Cgd!^>3E5lbd-VF7a{tEly{>be^AT$j{dV8Y^ndt>s}YUE5XGS+p%-g>Z4s}h zv3RKX=ujmbD0%`uwbA1HO1+TigFJ{G(D87nUz%H#=p;p16Wi6dI!gCNoHd>lp^%ZZ z`e{a7LL-LrYW0Yg-{J*zV%@cUcaKV>>i_m}xsb018;0xDHzJX^pCTzgld9vHuKmeG z=ND{T++7)(JdE%UzkBx%7O@Cz>9Y*X{oQU|SG%>6Rep0II!+_sn3<W0v4rjM5eZtu zkrUsqpz;M*I33)fVqq;|Al)_y$({9mb5oO6L5Ye2MUGjVjpQ>ecNVpxK_(HJdehlH z*8x<cL5Jd_ZFAe%`ncwoU###rK2A20rBys=t%!ov-Hj9odLP)$ex_Ys+wC4*c-FJU zt2CKM_TUSeiX4GPj9DfoCRlpLTwdMSWU?HPzHp&n`m-a{^<<v2Qtg~dJ-xlyUt#Zo z3F3Hkj|jQ$Tl>yK4LpJ5eoUb+tP}nA$d?#Yx=ueIm^RYa2YTW}nZ1HXV!qtZ_`S^6 zEQ=1L?a;DHki4hQ!-$=fp*D}*Vy^n`$K$zEwMGAw=ZKd@-5?^m)!)|_TyKh($3936 zzd8iRL}K@9Sth;EmzQt9Ok-QZ2m(jV{CZbcbd9m7waYlQ!EDiD!g+RKfrv{!>}Rfb z%4~cXng8n$U4|bw&n4@cVy#vIr_w$0oo86CSOnhzjiaIh+53fsGmT&&ux;|hsTlK6 zli82EPt?fhhH=i6@=d(Q8q-^v9eWAcBsf?}gdG0><!#16znSo&*tZ`wDT-uZ70^&( z8bWU&s5?yE`b{w;aFw;S_}92rVU&sHLl(VR26=V5fI7*9X~cCy6Ez~9K*gVQag7|Y zqT&g(4y$&v*|GKwS<32auX@uYo{So{;<>M0P91|)>3gL6099!=*XB{;Qet@Th`E@a z`dgK!6g_p}HTG4#S44(aH!OqgR5ZynQ+?QxEQoyxzl-l~gD2eSUr?!_>P*z)#Z3{o z$p~i%C?8b5``8G2*RE-s5ImXbtDFnx(~m5+^L9`XBjJF+HKX%BT4eAra=eON;1GU< zZ=p0%ZESf>YUMj(r#q5l5FeyW)d1Z?ol$$fOFwPrH~UU{X7)y>3JN>p5E?PgDgkPr zo=>IQ`3kuA29PXvD6uY>p{S;Y?O=*XUiZ^DYCm)OY1(VuS$(0)3SpcdqB4FLQt_Km z%24~sBK0)T(?jk(>e74LT$<1O<=E63gRZY#V8Xr>NFbp`DcpR6#kA+;tcMgu;_g9Q z#?yew2<)TSt68f40*&cfmoht+zTcD!iXtmyus9}EPAOzI@TzEMpqx-pO4tKNEeX=| zt+vM=FehoP=%tgj7Jv24Zabk9Y<ud{%+(rV(dRmYk>I4bjBOc9qD@b_+8x>D(R=Vp zjT*qjxs5-$<8LCKACqMMO8!oY(yPeo*6NN|=g2ASUUz3@WW2<Oz$@@$+sW%Z>?1{t z#NHc-pZ{K3-6q$%UPfwb;-&CUJ7|`{;v~h)28$x!n6}T0<$xm|#YII3BYz$%WXZB- zODx2$MVF2GadKJTNhe`%{;~%wizZQxH}njW!@~9O@z+-z+388E3%u_gp(-8lqT1nG z(@JX_@!g|rae<aJJWUlx&(aZ1CVMEEC*$2L2leZMf_cQivvYFH3y2ZX?JN$g*iV(6 z<-g1PyS>>;z>wSFw5fUMZw+6<fu_?Sied3`T=n_H^t7vt+n=n^8zmQ#N2xr5wS$Z} zgbL+Egv=3|K<u;WikzU{anX>gdU|?9VmH8UX2Q0fiG{TPw#(_9E;V*eVsRJN?y`JO z+uWo^y1C{Gi$)eAh+b1QHDoRGlW)?lfs-)l((>t3$fHLNthflJ=@*w6Sc+Tan%P5D z8CO|u>wCSu+U4GD_}fQd2+BCJQ7G_Oc=jIQvh~KKD9Xwdbi>ke6oy)q^!JYLCMGr0 z%Ltj&9A2dGK3uk<8usF2WS_r5p7{)+s<5yS9cf`<J7G+xgIqx!Bj_9bL;f+(o~;8! zbPk*QJJLvcyOhBLH#g^`Yfh%c)zhy<PiVQC{dM&uMbKbURYq{gUL9lK&c3d$(QZrZ z4%WZgZ+yifhd0o48=X=&c);HT<q~ZlKR<up1!KB8xM>cMdv1&yZS^l#f|;d0&y|?{ zyUTVqo8p6mr2C2k%p0h}2`$Si)pV4qtwANX$G;+xTladR!GdGkU4|lVbvLWs!Fl|9 zTfX!Orbh6rTr20}i~8L%W3~4NkPVgP^;ZUOG$Hcp=m-<%+Quesp>u+=*a4Na`FPiU zfH)LW97&|;8{O6(m+0?HxK0t=7GA~mpQ5X?jGg+?-w@@6kRN>E#O$xz9I=G8ZzCBk z*K1hG={O4;J7)=1fEksQ-JR|~FGF8C7NoZayXIc|s>wyS0DpgOZf<K!F<4txMzEV? zns{J&h}vhAcmK`Wzh2sloO8ZM)*<x3viQu6<dqaDqO!k6#ubq+E2}ZUYkrSSS(7C) zo{^NCau$bd0D2RQb2gQ+5O|sCtVi>$sJ}1tN=zuQ(@xHk0_%UMdd!87cjj&_*>AnA z8ze;>q5Ahwmv0x-5?`Pb64qP25=GH(KZ<%S7mh4=NK_11@-i~l?{T>KjrDbjWwLh6 zr$fQ}l$Y|}9q$gxm5iAgH5HCm6%0DX%~He_?!7AHFUm3^AN?)q>F2{I{35?ZE-kFg zbOx#=3XMh#)P?0wEex%F+FIKj*b;}}Pax3sjh~kC&yr620;^Vyv0Azo5`q^(Si7S- z8Q;&AmX;cM{7Dshox7CzuRB|fsK+{q;|UfBNotnv?$0=td}~7I7!EvspJcn<)1&_@ zLOU?|o89ueck)I?N3_EE3FFF_;BMQd%2h0UR)(ABxXLp%zKFHkA23d-w>LFi7F@og z8gBN(qvnE9_AY?<&|sM6i!#&o+<Xg-Eb{5oa+v>E_wJIEQP<)?2#n+E@Ijcu%KEU( zE-G?$a(XTFv|_SG=uykaJgFQjKB$Lq-1TVccJ<Q;Mg<LtE_jMP7_oT7@i6||4w^S2 zU_e|-#W5)uHJ+JUDeksli6=N%(f6%526TSI7#M*aME`G<RpWfPi`vJb1i60F(gZQ3 z4Tr4e+rpj;oD52^{k$J)FzbDHZ&%0w$LvPVt}8C$zh+5%5!Nm)E_ks4OT&PW(&iN} zHeVkDnqkXGTzq_m=hkLztt=-4FKMh^C<_<6q=X302?e>I?30+M*U${>WZoed8qx~A zQ(@|zRZeMBqpREg2B9HQwuka?H2{iXDH7N6Of3A4CldcIWqt6+VZ$i6!g-fM=BLvg zW#u45=F+v0l3!yuP;|A}Am=jot+GBb4NH?EM!EiJoy74*JA9VE463D(1W%8pZVUX+ z<>iFaL*N!Lu0WEmtdxB4v8hSdf)XB0qz9i|MAHd&po<ZFgStjW)AN5|sbdPrx1f1r zc~?BK(DPUo7em9Xmh+vht#F@9ZY+EOn8>igZ82O`vZ0#&VweGpz8LE;Ai=>vdzOYl zhJJ5AibguD0S#tu!x6e9f}vpX`BCRmExvm!o=z@zRuv8YG)j%#7YzZ~56Lua_Dc*l z!7SXjZy%&xY*o`LD&kv4j<ixxl9Qnhf$52*@WH&R7a@d*VpIT9N{dL0nH+rKRX6xE zeaC5LFs`%<$(t<g?5IxkASmo7%M$F*{=hJI+lM?^G=NzFO2;K-<z-#}e;(c+n06Iq zMe)T9694R3lj{-1OIVCLf+&%IQdH|ld$MvR`BLh~X5Bq9p~x{V)!ux3!&NAz>Bku( zvaL~j2g)m7m1NZckqm-Hii^ED*LV^sc5o8j8hvC_3d4hp>g41koEsPMf`XfVo2#|H zI-biP0S7g2c!NtEPnd}s<9Vl_AHt8T&SmD=wXr0GuOAObp+@7%$x!2r-5AW<EmB+S z0Q8l3Y={RdO59t8d#!)o41rph*#G?b0}(tuJ)PjD{P{jS0eS)zh6DZm2)BXds3-90 zSR+kbv;SgqR4T83M!v+{z)2}d$-Nm)PKwc&%`aRCXf|IPOG|(;2`oy45^_7NUDq^Z zt>W-cn#uLK-%<E{zizO-y<JO32a&~WovZ5VPliS&b)-8F1SZd!dI$1NyYg@TV;q<a z=~>BXBN848LBn$Rxy#DR?7tNQS%9`}C*Fe1gVL!Xe?66Cxt&)pM)CnN)M1U~V~sHh z&*^RPYn2zsJHCE}Qx-1vU-iOV*FysjDdHuVJE?Y2iJhMFoCrinH6Cjl8^ju6#6VfW z5=0~Yr&CGq_~+2sbLY<CZdM=@^ytwe#><sC((F9EykH%mK%qzLPuEP#$jqEqzS>VC z5?4`NJnA6oWaKqitMPO7v~At)354_Z2Iyz^=jAJLhH0j$T)BLiK$`guYnV&f2KcPt z{O~4V>eSKEA+SA$+k}M$<9qX$mFJ6pS9bCoPxJ(GGBa&#Z2|YBBrbWsA2MO4>NC7` zh*5#oY16fFOGN0y)vk#)tS4{Zz70bo!cs+ceXDr#{bUE>D?E3kmQx9vW*B`eiD9mi z51XYQ#maM(0ILU{QFsF*H3jsD9iMuiN(Xv|WGbuuR~{YQq9j6-*Jqay9WClI(dFNA zDy9j(XRPi?1dp{P+fWmjfO3XNgsJ5<IDu4ERmsfOy{|uOHV=|d#-``FsG}@*_yMKs zYk`4*7df4937_qe59r|a|8;Y0MvBaTGatFaf*m{I<TlMGRrJH&WYd|mZQ$P8TvOv3 zLd!TqBH&;gNChl!+_^N$$b*P=U;U5IpFf|md3S&c`Krd@Un_kKPR0f21D_cWiE}YL zT-^Wf;cB5QWa!hrfLu3OHeFeEdt66Zxe+|C-9FS5<Rw|olL(gv%<CknJ&lOa*4Dmp zSH7E!A?(&4cV9t{ZZ?IS87clVs5XGzjXCpf?QGwJ1Po3<ynJbCyR1JBkNMrNF@72q z1@kpnDG6e?UScnwoQw~A<E}yX(QR+psW47VPwVUMd-*gI*;(BlhrQ@(z|2!O_yBCs zY<KvnARppDLB_#Y2znK8sbXSE^!N3%Kcz%FQOH#L@kceDU_u>eS$gnV@rTimS<>el z0C^(Hb1U!LMAD@7&UEc3;3WF6-<LIUdJRmD((AOe5%d}G4JE0;BcZLMgBu3<u&gg+ zJ7wBD+u*mEj)5*ot$g;s!&vJ!n~zwvw`-E8*$6OQB@(>@`PJ6xHV?!Sej1l_%aQd< zTa)3;Kz<=T{b9?<d(%@0z&B5TP7|hb?Sq})Ptm`2Mm5HW2D6f1@xG3Lya-11wzeAQ z>n-JHT<@1rZ|~`0DZJ;Wc`o&VF!Q6;RhVQh(aTSL5aOIVLNnv0LF+(ZGcq)!Iyr@4 zm#62}$_nY@oBM9{cw#p^KttXeaF*qYO1u?1aG+Oe*tf-XYy^2_$NX%7`NMStjhNYz zk~bCiphWW|%kN)|gFUt_kzdQvj{&I$#Ohtbb662O!sJ4C2G!RJH9^p?FicX5?z3em zUw4<R!pT}%s-M~M+gY3`C?b2-j8^U8U^IqxUODHt-ZjQ}5XIx4A4~6{|Km<tw=-5I zLz)u7iZSOY%LdT5T1ITn#DZx=O@Yi_a@#8d!InUNP==wE0hR-LiKb?FQ-}b=*8%h0 zKBU+m@_5}!OObI-`%+Tn`)y}K@?<yL#ZURekG#$wyBCba7<w6Wgny#|@arpIx+MH^ z28S?sa}k%~Vq?LOb<8=V6h<K<JwW%}DkoQs`w9<ZsX<Pbk#A4|PS~5v$vC{|>d;k) zkMA1a^DEiUkFqs2;pL9%5a|zOAw$A3S*XV8=_rB5%sN_~FBTPfXBa0^^0*{6Zs-R2 z_f98GpX2J?4NNJ+Ly8>LF#;`TMtM1_3_VCDl9GlR8o(MXEfelDxTAl_)~Q;XD#OvS zwlY%=j}3jNJO|m=^qn7<e_)YOAVyCO6E*9*`7adW0jz9QyxlT|OZ3h(E=VIzEUzJ( za_q@PC8dVSmB>Wb0*!PWMzDD~)Jkn!pQohnG<&B&rQve)AyzU+77p6BB+ZrYk)Rg3 z>47F}L)79GjT(_CcKS46(|~u$b95J++TDJfQNhN>#^~On_Fowb$z;J@<5MRS8Nj-< z{BU_g*}U>!g|>;vI0f~EnUy~OcuGA`y+sNR%Fv6%UDMG~q{m_IIrmcu{eTu<-=x?h zlkM>jbVw-PfM+!h*B68u`_Tl{DQ(&62q?9#D{wNv5p1#9YZdT*T4lmr!tb=hzQ;(t z;Ic6LpAZzpiVqJO5=qXC*b;e8be&3RqwaP`0jj?q<s)HBVB6&<HeT~6^s5GH(i{EL zQemccZcn+bl(;Ydu<P7sN`TDaZ=#ZR%h7Sa@J-E}T|8ix=%;GnTE)YPbn8ZfEY~4B znJ_c>vES%C!V;FjOW25Kymh5~PhNc*SG8;s)ccoXs`~r7Qf~+o5uKnQhjs?zAa^*I zeI9!e;4$wVU)_6RlhnRTUidN}um?FgvX{)??s~%oc^Ar;fPk5kh4<^*^(pSDN;t6K zkhKIK98kDau4;sh>P;vAZ~@3mM|J)0klNDRjE!@=%5i$?m2I6FSQ<0D4F-aUHYy|w zAT0c0*~h@=W}p3+UHS<~?_GY|CMvL29s_jinKMXSf)e%dBaICGn4#U#5O@2-06vR` zpd$%PE;r2Woy1!Rpaqhx&<jcBKIcyR^pWnds0sf%)hnlb@Kr(hDKc{*=F8j#c>{sY zi9CF4kAX6W9X+?Rtg!Is@>*w+sj9Nqev19n6rmEOy{;iO#&80l&M-VA_pSyuEg&EO zlN!D<GBRFhSdpD%kT<9vW)jdSF|xnI|3V`jMz8qz=0|jLzg)<w4ok?=-)|h&H#A)O z<1P|sYhrQ~PVKE72`!k~4;~kqcrZpS6t8lSngVatd&d%kevCr8x)-(`a+a2sD?frx zy4}5t;s8)7PQVX?4oh=u!$QVSe57PlKP^32A_L%6h#4iEilW#uH7fa)u@I<$taMZe zp%rR$2@Rt0M*f14CRp*-!f2Z_CA0M%yIci?%VZAfXEX6t$;Fy{W~>=@f<4vhOl*fW z#06-)cLx0q0<%ZU&bM>vY;h~nLLcHko#eW60eiaDxRvK*o$BgiEyn+l-7<%Nx2cJ4 z)CTP}Zv|2yKR%<&z#KonKHq}-!W86ZP~ybZoO@JBn4VfdP|$8g?1kHIzuL&i$p8LR zgphjgh1MCCjm@=bd`#}+$CZ_prB+9e1=WY83Da@Jo}rU{r|@>yZ8i8#V1n32Zc49- zNHeOP`f0Dpc;KOY?cmJ%x<KHJDgI<Y{Py1nSVy7|#nMmq@UO_aL7ec&^b5yewibwL zbYwNM@9kB%Pjz2AXKVLg!~*u+s94y5Ev`DRK1aU@jt6hVM@&k<U~Xp@k%ow|c;0X_ z{Frn8Me9a<N9f+(r<m;il2+Ut$PEYzy}Nz#^s@z{XeSzwVvUokGZW@wP_nQ{gLS46 ev(#{$)ciR%Z)1NEp4oQ%qC1onROAt{@%bM-&rk;d diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile deleted file mode 100644 index bfdc1ef236..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/no-Makefile +++ /dev/null @@ -1,41 +0,0 @@ -TNL_VERSION=0.1 -TNL_INSTALL_DIR=${HOME}/local/lib -TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION} - -TARGET = hamiltonJacobiParallelSolver -#CONFIG_FILE = $(TARGET).cfg.desc -INSTALL_DIR = ${HOME}/local -CXX = g++ -CUDA_CXX = nvcc -OMP_FLAGS = -DHAVE_OPENMP -fopenmp -CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG -LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp - -SOURCES = main.cpp -HEADERS = -OBJECTS = main.o -DIST = $(SOURCES) Makefile - -all: $(TARGET) -clean: - rm -f $(OBJECTS) - rm -f $(TARGET)-conf.h - -dist: $(DIST) - tar zcvf $(TARGET).tgz $(DIST) - -install: $(TARGET) - cp $(TARGET) $(INSTALL_DIR)/bin - cp $(CONFIG_FILE) $(INSTALL_DIR)/share - -uninstall: $(TARGET) - rm -f $(INSTALL_DIR)/bin/$(TARGET) - rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share - -$(TARGET): $(OBJECTS) - $(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS) - -%.o: %.cpp $(HEADERS) - $(CXX) -c -o $@ $(CXX_FLAGS) $< - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h deleted file mode 100644 index c07ee95aa0..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/parallelMapConfig.h +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************** - parallelMapConfig.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ -#define HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ - -#include <config/tnlConfigDescription.h> - -template< typename ConfigTag > -class parallelMapConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" ); - config.addEntry < String > ( "scheme", "This defines scheme used for discretization.", "godunov" ); - config.addEntryEnum( "godunov" ); - config.addEntryEnum( "upwind" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < String > ( "map", "Gradient map for solver"); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 ); - config.addEntry < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 ); - config.addRequiredEntry < double > ( "stop-time", " Final time for solver"); - config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); - config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); - config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - } -}; - -#endif /* HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run b/src/TNL/Legacy/hamilton-jacobi-parallel-map/run deleted file mode 100755 index 4844199627..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/run +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -dimensions=2 - -size=2 - -time=50 - -rm -r work_dir -mkdir work_dir -cp mapa_png.png work_dir/mapa_png.png -cd work_dir - -tnl-image-converter --image-format png\ - --input-images mapa_png.png - - -tnl-init --test-function sdf-para \ - --x-centre 0.5 \ - --y-centre 1.0 \ - --offset 0.05 \ - --output-file init.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 - -hamilton-jacobi-parallel-map-dbg --initial-condition init.tnl \ - --map mapa_png.tnl \ - --cfl-condition 50 \ - --mesh mesh.tnl \ - --initial-tau 1.0e-3 \ - --epsilon 4.0 \ - --delta 0.0 \ - --stop-time $time \ - --scheme godunov \ - --subgrid-size 8 \ - --dim $dimensions - - -#cp ../template.dat1 template.dat1 -#cp ../template.dat2 template.dat2 -#cp ../gplt2eps.py gplt2eps.py -cd .. - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py deleted file mode 100755 index f8cde3768e..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import sys, string, math - -arguments = sys. argv[1:] -format = "txt" -output_file_name = "eoc-table.txt" -input_files = [] -verbose = 1 -size = 1.0 - -i = 0 -while i < len( arguments ): - if arguments[ i ] == "--format": - format = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--output-file": - output_file_name = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--verbose": - verbose = float( arguments[ i + 1 ] ) - i = i +2 - continue - if arguments[ i ] == "--size": - size = float( arguments[ i + 1 ] ) - i = i +2 - continue - input_files. append( arguments[ i ] ) - i = i + 1 - -if not verbose == 0: - print "Writing to " + output_file_name + " in " + format + "." - -h_list = [] -l1_norm_list = [] -l2_norm_list = [] -max_norm_list = [] -items = 0 - -for file_name in input_files: - if not verbose == 0: - print "Processing file " + file_name - file = open( file_name, "r" ) - - l1_max = 0.0 - l_max_max = 0.0 - file.readline(); - file.readline(); - for line in file. readlines(): - data = string. split( line ) - h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) ) - l1_norm_list. append( float( data[ 1 ] ) ) - l2_norm_list. append( float( data[ 2 ] ) ) - max_norm_list. append( float( data[ 3 ] ) ) - items = items + 1 - if not verbose == 0: - print line - file. close() - -h_width = 12 -err_width = 15 -file = open( output_file_name, "w" ) -if format == "latex": - file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" ) - file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" ) - file. write( " " + string. rjust( " ", h_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error.", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + - "\\\\ \\hline \\hline \n") -if format == "txt": - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - file. write( "| h | L1 Err. | L1 EOC. | L2 Err. | L2 EOC | MAX Err. | MAX EOC |\n" ) - file. write( "+==============+================+================+================+================+================+================+\n" ) - - -i = 0 -while i < items: - if i == 0: - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&"+ - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1; - continue - if h_list[ i ] == h_list[ i - 1 ]: - print "Unable to count eoc since h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" - file. write( " eoc error: h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" ) - else: - h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] ) - l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] ) - l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] ) - max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] ) - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1 - -if format == "latex": - file. write( "\\hline \n" ) - file. write( "\\end{tabular} \n" ) - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h deleted file mode 100644 index 400e163c9d..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h +++ /dev/null @@ -1,217 +0,0 @@ -/*************************************************************************** - tnlParallelMapSolver.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELMAPSOLVER_H_ -#define TNLPARALLELMAPSOLVER_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <functions/tnlMeshFunction.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> -#include <limits.h> -#include <core/tnlDevice.h> - - -#include <ctime> - -#ifdef HAVE_CUDA -#include <core/tnlCuda.h> -#endif - - -template< int Dimension, - typename SchemeHost, - typename SchemeDevice, - typename Device, - typename RealType = double, - typename IndexType = int > -class tnlParallelMapSolver -{}; - -template<typename SchemeHost, typename SchemeDevice, typename Device> -class tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int > -{ -public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType; -#ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA; -#endif - tnlParallelMapSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - -/*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map); - - - tnlMeshFunction<MeshType> u0; - VectorType work_u, map_stretched, map; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - -// tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - double* map_stretched_cuda; - - int* subgridValues_cuda; - int* boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - double* tmp_map; - - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA2D( double u, const int i ); - - __device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID); - - __device__ int getOwnerCUDA2D( int i) const; - - __device__ int getSubgridValueCUDA2D( int i ) const; - - __device__ void setSubgridValueCUDA2D( int i, int value ); - - __device__ int getBoundaryConditionCUDA2D( int i ) const; - - __device__ void setBoundaryConditionCUDA2D( int i, int value ); - -#endif - -}; - - - - - - - - - - - - - - -#ifdef HAVE_CUDA -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void runCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initRunCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initCUDA2D( tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3, double* tmp_map_ptr); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -#endif - -#include "tnlParallelMapSolver2D_impl.h" -#endif /* TNLPARALLELMAPSOLVER_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h deleted file mode 100644 index e8cbc6fc16..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h +++ /dev/null @@ -1,1315 +0,0 @@ -/*************************************************************************** - tnlParallelMapSolver2D_impl.h - description - ------------------- - begin : Mar 22 , 2016 - copyright : (C) 2016 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELMAPSOLVER2D_IMPL_H_ -#define TNLPARALLELMAPSOLVER2D_IMPL_H_ - - -#include "tnlParallelMapSolver.h" -#include <core/mfilename.h> - - - - -#define MAP_SOLVER_MAX_VALUE 3 - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver() -{ - this->device = TNL::Devices::HostDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <<std::endl; - const String& meshLocation = parameters.getParameter <String>("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter <int>("subgrid-size"); - cout << "Setting N to " << this->n <<std::endl; - - this->subMesh.setDimensions( this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - this->u0.load( initialCondition ); - - /* LOAD MAP */ - const String& mapFile = parameters.getParameter <String>("map"); - if(! this->map.load( mapFile )) - cout << "Failed to load map file : " << mapFile <<std::endl; - - - this->delta = parameters.getParameter <double>("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >(); - - cout << "Setting delta to " << this->delta <<std::endl; - - this->tau0 = parameters.getParameter <double>("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <<std::endl; - this->stopTime = parameters.getParameter <double>("stop-time"); - - this->cflCondition = parameters.getParameter <double>("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >()); - cout << "Setting CFL to " << this->cflCondition <<std::endl; - - stretchGrid(); - this->stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <<std::endl; - - cout << "Initializating scheme..." <<std::endl; - if(!this->schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <<std::endl; - return false; - } - cout << "Scheme initialized." <<std::endl; - - test(); - - VectorType* tmp = new VectorType[subgridValues.getSize()]; - bool containsCurve = false; - -#ifdef HAVE_CUDA - - if(this->device == tnlCudaDevice) - { - cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - - double** tmpdev = NULL; - cudaMalloc(&tmpdev, sizeof(double*)); - cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - double* tmpu = NULL; - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - } -#endif - - if(this->device == TNL::Devices::HostDevice) - { - VectorType tmp_map; - tmp_map.setSize(this->n * this->n); - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - - if(! tmp[i].setSize(this->n * this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <<std::endl; - tmp[i] = getSubgrid(i); - containsCurve = false; - - for(int j = 0; j < tmp[i].getSize(); j++) - { - if(tmp[i][0]*tmp[i][j] <= 0.0) - { - containsCurve = true; - j=tmp[i].getSize(); - } - - } - if(containsCurve) - { - for( int j = 0; j < tmp_map.getSize(); j++) - { - tmp_map[j] = this->map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - //cout << "Computing initial SDF on subgrid " << i << "." <<std::endl; - tmp[i] = runSubgrid(0, tmp[i],i,tmp_map); - insertSubgrid(tmp[i], i); - setSubgridValue(i, 4); - //cout << "Computed initial SDF on subgrid " << i << "." <<std::endl; - } - containsCurve = false; - - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - } -#endif - - - this->currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - - synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - -#endif - cout << "Solver initialized." <<std::endl; - - return true; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - while ((this->boundaryConditions.max() > 0 )/* || !end*/) - { - -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - if(getSubgridValue(i) != INT_MAX) - { - VectorType tmp, tmp_map; - tmp.setSize(this->n * this->n); - tmp_map.setSize(this->n * this->n); - for( int j = 0; j < tmp_map.getSize(); j++) - { - tmp_map[j] = this->map_stretched[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - - if(getSubgridValue(i) == currentStep+4) - { - - if(getBoundaryCondition(i) & 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - else - { - - if(getBoundaryCondition(i) == 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) == 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if(getBoundaryCondition(i) & 3) - { - //cout << "3 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(3, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - } - if(getBoundaryCondition(i) & 5) - { - //cout << "5 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(5, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - } - if(getBoundaryCondition(i) & 10) - { - //cout << "10 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(10, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - } - if(getBoundaryCondition(i) & 12) - { - //cout << "12 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(12, tmp ,i,tmp_map); - insertSubgrid( tmp, i); - } - - - setBoundaryCondition(i, 0); - - setSubgridValue(i, getSubgridValue(i)-1); - - } - } - synchronize(); - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - bool end_cuda = false; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - bool* tmpb; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - int i = 1; - time_diff = 0.0; - while (run_host || !end_cuda) - { - cout << "Computing at step "<< i++ <<std::endl; - if(run_host != 0 ) - end_cuda = true; - else - end_cuda = false; - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - start = std::clock(); - runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); - time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - //start = std::clock(); - synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - } - cout << "Solving time was: " << time_diff <<std::endl; - - cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - - cudaDeviceSynchronize(); - } -#endif - contractGrid(); - this->u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl; - cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl; - cout << "Solver finished" <<std::endl; - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->tmp_map); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <<std::endl; - int tmp1, tmp2; - int grid1, grid2; - -// if(this->currentStep & 1) -// { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i; - tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); - } - } - } - -// } -// else -// { - for(int i = 1; i < this->gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1; - tmp2 = this->gridCols*this->n*j + i*this->n ; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } -// } - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl; - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <<std::endl; - - - this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - - - cout << "Setting gridCols to " << this->gridCols << "." <<std::endl; - cout << "Setting gridRows to " << this->gridRows << "." <<std::endl; - - this->subgridValues.setSize(this->gridCols*this->gridRows); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <<std::endl; - if(!this->map_stretched.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched map." <<std::endl; - if(!this->unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <<std::endl; - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - for(int i = 0; i < stretchedSize; i++) - { - this->unusedCell[i] = 1; - int diff =(this->n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - k+= i%(this->n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - - if(fabs(this->u0[i-k]) < mesh.template getSpaceStepsProducts< 1, 0 >()+mesh.template getSpaceStepsProducts< 0, 1 >() ) - this->work_u[i] = this->u0[i-k]; - else - this->work_u[i] = sign(this->u0[i-k])*MAP_SOLVER_MAX_VALUE; - - this->map_stretched[i] = this->map[i-k]; - } - - - cout << "Grid stretched." <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <<std::endl; - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - for(int i = 0; i < stretchedSize; i++) - { - int diff =(this->n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0)) - { - this->u0[i-k] = this->work_u[i]; - } - - } - - cout << "Grid contracted" <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - VectorType u; - u.setSize(this->n*this->n); - - for( int j = 0; j < u.getSize(); j++) - { - u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - - for( int j = 0; j < this->n*this->n; j++) - { - int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j]; - this->unusedCell[index] = 0; - } - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID,VectorType map) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1); - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - } - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } - - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - for( int i = 0; i < u.getSize(); i ++ ) - { - if(map[i] == 0.0) - { - u[i] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE; - } - } - - while( time < finalTime ) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { - Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x())); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - if(map[i] != 0.0) - fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities,map); - } - maxResidue = fu. absMax(); - - - if(maxResidue != 0.0) - currentTau = fabs(this -> cflCondition / maxResidue); - - - if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) - { - currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - - - for( int i = 0; i < fu.getSize(); i ++ ) - { - if(map[i] != 0.0) - u[ i ] += currentTau * fu[ i ]; - } - time += currentTau; - - } - return u; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - *a = caller->work_u_cuda[th]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i ) -{ - int index = (blockIdx.y)*this->n*this->n*this->gridCols - + (blockIdx.x)*this->n - + threadIdx.y*this->n*this->gridCols - + threadIdx.x; - - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - volatile double* sharedTau = &u[blockDim.x*blockDim.y]; - double* map_local = &u[2*blockDim.x*blockDim.y]; - - int i = threadIdx.x; - int j = threadIdx.y; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - - /* LOAD MAP */ - map_local[l]=this->map_stretched_cuda[gid]; - if(map_local[l] != 0.0) - map_local[l] = 1.0/map_local[l]; - /* LOADED */ - - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1); - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - - if(tmp !=1) - { - if(computeFU) - { - if(boundaryCondition == 4) - u[l] = u[threadIdx.y * blockDim.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x); - else if(boundaryCondition == 2) - u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x); - else if(boundaryCondition == 8) - u[l] = u[threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y); - else if(boundaryCondition == 1) - u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y); - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; - - double finalTime = this->stopTime; - if(boundaryCondition == 0) - finalTime*=2.0; - __syncthreads(); - - tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<2,int>(i,j)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - if(map_local[l] == 0.0) - { - u[l] = /*sign(u[l])**/MAP_SOLVER_MAX_VALUE; - computeFU = false; - } - __syncthreads(); - - - while( time < finalTime ) - { - sharedTau[l] = finalTime; - - if(computeFU) - { - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j), u, time, boundaryCondition, neighborEntities, map_local); - sharedTau[l]=abs(cfl/fu); - } - - - - if(l == 0) - { - if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - else if(l == blockDim.x*blockDim.y - 1) - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - - - if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - - u[l] += currentTau * fu; - time += currentTau; - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const -{ - - return ((i / (this->gridCols*this->n*this->n))*this->gridCols - + (i % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8 - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void synchronizeCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[4]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - newSubgridValue = 0; - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); - boundary_index = 0; - } - - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - } - __threadfence(); - __syncthreads(); - - if(threadIdx.x+threadIdx.y == 0) - { - if(subgridValue == INT_MAX && newSubgridValue !=0) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3]); - - - if(blockIdx.x+blockIdx.y ==0) - { - cudaSolver->currentStep += 1; - *(cudaSolver->runcuda) = 0; - } - } - -} - - - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ - - - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr) -{ - - - cudaSolver->work_u_cuda = ptr; - cudaSolver->map_stretched_cuda = tmp_map_ptr; - cudaSolver->unusedCell_cuda = ptr3; - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->runcuda = ptr2; - *(cudaSolver->runcuda) = 1; - -/* CHANGED !!!!!! from 1 to 0*/ cudaSolver->currentStep = 0; - - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - printf("GPU memory initialized.\n"); -} - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - extern __shared__ double u[]; - - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - containsCurve = 0; - - - caller->getSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - - if(u[0] * u[l] <= 0.0) - atomicMax( &containsCurve, 1); - - __syncthreads(); - if(containsCurve == 1) - { - caller->runSubgridCUDA2D(0,u,i); - caller->insertSubgridCUDA2D(u[l],i); - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA2D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA2D(i); - - if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0) - { - caller->getSubgridCUDA2D(i,caller, &u[l]); - - - if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA2D(1,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2) - { - caller->runSubgridCUDA2D(2,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA2D(4,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA2D(8,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - } - else - { - - if(bound == 1) - { - caller->runSubgridCUDA2D(1,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 2) - { - caller->runSubgridCUDA2D(2,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 4) - { - caller->runSubgridCUDA2D(4,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 8) - { - caller->runSubgridCUDA2D(8,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - } - - if(bound & 3) - { - caller->runSubgridCUDA2D(3,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 5) - { - caller->runSubgridCUDA2D(5,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 10) - { - caller->runSubgridCUDA2D(10,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 12) - { - caller->runSubgridCUDA2D(12,u,i); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - if(l==0) - { - caller->setBoundaryConditionCUDA2D(i, 0); - caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELMAPSOLVER2D_IMPL_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt b/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt deleted file mode 100644 index f6a00127c7..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set( tnl_hamilton_jacobi_parallel_SOURCES -# MainBuildConfig.h -# tnlParallelEikonalSolver2D_impl.h -# tnlParallelEikonalSolver3D_impl.h -# tnlParallelEikonalSolver.h -# parallelEikonalConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(hamilton-jacobi-parallel main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (hamilton-jacobi-parallel tnl ) - - -INSTALL( TARGETS hamilton-jacobi-parallel - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/hamilton-jacobi-parallel ) diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h deleted file mode 100644 index ed3d686eb9..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include <solvers/tnlBuildConfigTags.h> - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" <<std::endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp deleted file mode 100644 index b13498e173..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Jul 8 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu b/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu deleted file mode 100644 index 7101976712..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cu - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h b/src/TNL/Legacy/hamilton-jacobi-parallel/main.h deleted file mode 100644 index dbaebdcebd..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/main.h +++ /dev/null @@ -1,142 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Mar 30 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnlParallelEikonalSolver.h" -#include "parallelEikonalConfig.h" -#include "MainBuildConfig.h" -#include <solvers/tnlBuildConfigTags.h> -#include <operators/hamilton-jacobi/godunov-eikonal/parallelGodunovEikonal.h> -#include <mesh/tnlGrid.h> -#include <core/tnlDevice.h> -#include <time.h> -#include <ctime> - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - parallelEikonalConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - //if (parameters.GetParameter <String>("scheme") == "godunov") - //{ - tnlDeviceEnum device; - device = TNL::Devices::HostDevice; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost; - /*#ifdef HAVE_CUDA - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; - #endif - #ifndef HAVE_CUDA*/ - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; - /*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - else if(device==tnlCudaDevice ) - { - typedef tnlCuda Device; - //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; - - tnlParallelEikonalSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - // } - } - else if(dim == 3) - { - - typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeHost; - /*#ifdef HAVE_CUDA - typedef parallelGodunovEikonalScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; - #endif - #ifndef HAVE_CUDA*/ - typedef parallelGodunovEikonalScheme< tnlGrid<3,double,TNL::Devices::Host, int>, double, int > SchemeTypeDevice; - /*#endif*/ - - if(device==TNL::Devices::HostDevice) - { - typedef TNL::Devices::Host Device; - - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - else if(device==tnlCudaDevice ) - { - typedef tnlCuda Device; - //typedef parallelGodunovEikonalScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; - - tnlParallelEikonalSolver<3,SchemeTypeHost,SchemeTypeDevice, Device> solver; - if(!solver.init(parameters)) - { - std::cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver loop..." <<std::endl; - solver.run(); - } - // } - } - - time(&stop); - std::cout <<std::endl; - std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl; - return EXIT_SUCCESS; -} - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile b/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile deleted file mode 100644 index bfdc1ef236..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/no-Makefile +++ /dev/null @@ -1,41 +0,0 @@ -TNL_VERSION=0.1 -TNL_INSTALL_DIR=${HOME}/local/lib -TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION} - -TARGET = hamiltonJacobiParallelSolver -#CONFIG_FILE = $(TARGET).cfg.desc -INSTALL_DIR = ${HOME}/local -CXX = g++ -CUDA_CXX = nvcc -OMP_FLAGS = -DHAVE_OPENMP -fopenmp -CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG -LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp - -SOURCES = main.cpp -HEADERS = -OBJECTS = main.o -DIST = $(SOURCES) Makefile - -all: $(TARGET) -clean: - rm -f $(OBJECTS) - rm -f $(TARGET)-conf.h - -dist: $(DIST) - tar zcvf $(TARGET).tgz $(DIST) - -install: $(TARGET) - cp $(TARGET) $(INSTALL_DIR)/bin - cp $(CONFIG_FILE) $(INSTALL_DIR)/share - -uninstall: $(TARGET) - rm -f $(INSTALL_DIR)/bin/$(TARGET) - rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share - -$(TARGET): $(OBJECTS) - $(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS) - -%.o: %.cpp $(HEADERS) - $(CXX) -c -o $@ $(CXX_FLAGS) $< - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h b/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h deleted file mode 100644 index c27f5ebb39..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/parallelEikonalConfig.h +++ /dev/null @@ -1,46 +0,0 @@ -/*************************************************************************** - parallelEikonalConfig.h - description - ------------------- - begin : Oct 5, 2014 - copyright : (C) 2014 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ -#define HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ - -#include <config/tnlConfigDescription.h> - -template< typename ConfigTag > -class parallelEikonalConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Parallel Eikonal solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" ); - config.addEntry < String > ( "scheme", "This defines scheme used for discretization.", "godunov" ); - config.addEntryEnum( "godunov" ); - config.addEntryEnum( "upwind" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 ); - config.addEntry < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 ); - config.addRequiredEntry < double > ( "stop-time", " Final time for solver"); - config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); - config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); - config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - } -}; - -#endif /* HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/run b/src/TNL/Legacy/hamilton-jacobi-parallel/run deleted file mode 100755 index 3aece294a9..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/run +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -#GRID_SIZES="0897" -GRID_SIZES="0008 0015 0029 0057 0113 0225 0449" -#GRID_SIZES="1793" - -dimensions=2 - -size=2 - -time=3 - -for grid_size in $GRID_SIZES; - -do - - rm -r grid-${grid_size} - mkdir grid-${grid_size} - cd grid-${grid_size} - - tnl-grid-setup --dimensions $dimensions \ - --origin-x -1.0 \ - --origin-y -1.0 \ - --origin-z -1.0 \ - --proportions-x $size \ - --proportions-y $size \ - --proportions-z $size \ - --size-x ${grid_size} \ - --size-y ${grid_size} \ - --size-z ${grid_size} - - tnl-init --test-function sdf-para \ - --offset 0.25 \ - --output-file init.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 \ - - - tnl-init --test-function sdf-para-sdf \ - --offset 0.25 \ - --output-file sdf.tnl \ - --final-time 0.0 \ - --snapshot-period 0.1 - - hamilton-jacobi-parallel --initial-condition init.tnl \ - --cfl-condition 1.0e-1 \ - --mesh mesh.tnl \ - --initial-tau 1.0e-3 \ - --epsilon 1.0 \ - --delta 0.0 \ - --stop-time $time \ - --scheme godunov \ - --subgrid-size 8 - - tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff - - cd .. - -done - - -./tnl-err2eoc-2.py --format txt --size $size *.diff - - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py b/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py deleted file mode 100755 index f8cde3768e..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnl-err2eoc-2.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import sys, string, math - -arguments = sys. argv[1:] -format = "txt" -output_file_name = "eoc-table.txt" -input_files = [] -verbose = 1 -size = 1.0 - -i = 0 -while i < len( arguments ): - if arguments[ i ] == "--format": - format = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--output-file": - output_file_name = arguments[ i + 1 ] - i = i + 2 - continue - if arguments[ i ] == "--verbose": - verbose = float( arguments[ i + 1 ] ) - i = i +2 - continue - if arguments[ i ] == "--size": - size = float( arguments[ i + 1 ] ) - i = i +2 - continue - input_files. append( arguments[ i ] ) - i = i + 1 - -if not verbose == 0: - print "Writing to " + output_file_name + " in " + format + "." - -h_list = [] -l1_norm_list = [] -l2_norm_list = [] -max_norm_list = [] -items = 0 - -for file_name in input_files: - if not verbose == 0: - print "Processing file " + file_name - file = open( file_name, "r" ) - - l1_max = 0.0 - l_max_max = 0.0 - file.readline(); - file.readline(); - for line in file. readlines(): - data = string. split( line ) - h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) ) - l1_norm_list. append( float( data[ 1 ] ) ) - l2_norm_list. append( float( data[ 2 ] ) ) - max_norm_list. append( float( data[ 3 ] ) ) - items = items + 1 - if not verbose == 0: - print line - file. close() - -h_width = 12 -err_width = 15 -file = open( output_file_name, "w" ) -if format == "latex": - file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" ) - file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) - file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" ) - file. write( " " + string. rjust( " ", h_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + "&" + - string. rjust( "Error.", err_width ) + "&" + - string. rjust( "{\\bf EOC}", err_width ) + - "\\\\ \\hline \\hline \n") -if format == "txt": - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - file. write( "| h | L1 Err. | L1 EOC. | L2 Err. | L2 EOC | MAX Err. | MAX EOC |\n" ) - file. write( "+==============+================+================+================+================+================+================+\n" ) - - -i = 0 -while i < items: - if i == 0: - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&"+ - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( " ", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( " ", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1; - continue - if h_list[ i ] == h_list[ i - 1 ]: - print "Unable to count eoc since h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" - file. write( " eoc error: h[ " + \ - str( i ) + " ] = h[ " + str( i - 1 ) + \ - " ] = " + str( h_list[ i ] ) + ". \n" ) - else: - h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] ) - l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] ) - l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] ) - max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] ) - if format == "latex": - file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + - string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" ) - if format == "txt": - file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + - string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" + - string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + - string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" ) - file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) - i = i + 1 - -if format == "latex": - file. write( "\\hline \n" ) - file. write( "\\end{tabular} \n" ) - diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h deleted file mode 100644 index 19cdd94935..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h +++ /dev/null @@ -1,366 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER_H_ -#define TNLPARALLELEIKONALSOLVER_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <functions/tnlMeshFunction.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> -#include <limits.h> -#include <core/tnlDevice.h> - #include <omp.h> - - -#include <ctime> - -#ifdef HAVE_CUDA -#include <core/tnlCuda.h> -#endif - - -template< int Dimension, - typename SchemeHost, - typename SchemeDevice, - typename Device, - typename RealType = double, - typename IndexType = int > -class tnlParallelEikonalSolver -{}; - -template<typename SchemeHost, typename SchemeDevice, typename Device> -class tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int > -{ -public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshType; -#ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 2, double, TNL::Devices::Host, int > MeshTypeCUDA; -#endif - tnlParallelEikonalSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - -/*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); - - - tnlMeshFunction<MeshType> u0; - VectorType work_u; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - -// tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - - int* subgridValues_cuda; - int*boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - //MeshTypeCUDA mesh_cuda, subMesh_cuda; - //SchemeDevice scheme_cuda; - //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; - //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA2D( const int i, tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA2D( double u, const int i ); - - __device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID); - - /*__global__ void runCUDA();*/ - - //__device__ void synchronizeCUDA(); - - __device__ int getOwnerCUDA2D( int i) const; - - __device__ int getSubgridValueCUDA2D( int i ) const; - - __device__ void setSubgridValueCUDA2D( int i, int value ); - - __device__ int getBoundaryConditionCUDA2D( int i ) const; - - __device__ void setBoundaryConditionCUDA2D( int i, int value ); - - //__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - /*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/ - -#endif - -}; - - - - - - - - template<typename SchemeHost, typename SchemeDevice, typename Device> - class tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int > - { - public: - - typedef SchemeDevice SchemeTypeDevice; - typedef SchemeHost SchemeTypeHost; - typedef Device DeviceType; - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorType; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorType; - typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshType; - #ifdef HAVE_CUDA - typedef TNL::Containers::Vector< double, TNL::Devices::Host, int > VectorTypeCUDA; - typedef TNL::Containers::Vector< int, TNL::Devices::Host, int > IntVectorTypeCUDA; - typedef tnlGrid< 3, double, TNL::Devices::Host, int > MeshTypeCUDA; - #endif - tnlParallelEikonalSolver(); - bool init( const Config::ParameterContainer& parameters ); - void run(); - - void test(); - - /*private:*/ - - - void synchronize(); - - int getOwner( int i) const; - - int getSubgridValue( int i ) const; - - void setSubgridValue( int i, int value ); - - int getBoundaryCondition( int i ) const; - - void setBoundaryCondition( int i, int value ); - - void stretchGrid(); - - void contractGrid(); - - VectorType getSubgrid( const int i ) const; - - void insertSubgrid( VectorType u, const int i ); - - VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); - - - tnlMeshFunction<MeshType> u0; - VectorType work_u; - IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; - MeshType mesh, subMesh; - SchemeHost schemeHost; - SchemeDevice schemeDevice; - double delta, tau0, stopTime,cflCondition; - int gridRows, gridCols, gridLevels, currentStep, n; - - std::clock_t start; - double time_diff; - - - tnlDeviceEnum device; - - tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf() - { - return this; - }; - -#ifdef HAVE_CUDA - - tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; - - double* work_u_cuda; - - int* subgridValues_cuda; - int*boundaryConditions_cuda; - int* unusedCell_cuda; - int* calculationsCount_cuda; - double* tmpw; - //MeshTypeCUDA mesh_cuda, subMesh_cuda; - //SchemeDevice scheme_cuda; - //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; - //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; - - int* runcuda; - int run_host; - - - __device__ void getSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void updateSubgridCUDA3D( const int i, tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); - - __device__ void insertSubgridCUDA3D( double u, const int i ); - - __device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID); - - /*__global__ void runCUDA();*/ - - //__device__ void synchronizeCUDA(); - - __device__ int getOwnerCUDA3D( int i) const; - - __device__ int getSubgridValueCUDA3D( int i ) const; - - __device__ void setSubgridValueCUDA3D( int i, int value ); - - __device__ int getBoundaryConditionCUDA3D( int i ) const; - - __device__ void setBoundaryConditionCUDA3D( int i, int value ); - - //__device__ bool initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - /*__global__ void initRunCUDA(tnlParallelEikonalSolver<Scheme, double, TNL::Devices::Host, int >* caller);*/ - -#endif - -}; - - - - - - -#ifdef HAVE_CUDA -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void runCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initRunCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initCUDA2D( tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronizeCUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronize2CUDA2D(tnlParallelEikonalSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - - - - - - - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void runCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initRunCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initCUDA3D( tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronizeCUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void synchronize2CUDA3D(tnlParallelEikonalSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); -#endif - - -#ifdef HAVE_CUDA -__cuda_callable__ -double fabsMin( double x, double y) -{ - double fx = fabs(x); - - if(Min(fx,fabs(y)) == fx) - return x; - else - return y; -} - -__cuda_callable__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - -#endif - -#include "tnlParallelEikonalSolver2D_impl.h" -#include "tnlParallelEikonalSolver3D_impl.h" -#endif /* TNLPARALLELEIKONALSOLVER_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h deleted file mode 100644 index 76cf49bc8a..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver2D_impl.h +++ /dev/null @@ -1,1928 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver2D_impl.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER2D_IMPL_H_ -#define TNLPARALLELEIKONALSOLVER2D_IMPL_H_ - - -#include "tnlParallelEikonalSolver.h" -#include <core/mfilename.h> - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() -{ - cout << "a" <<std::endl; - this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - - cout << "b" <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <<std::endl; - const String& meshLocation = parameters.getParameter <String>("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter <int>("subgrid-size"); - cout << "Setting N to " << this->n <<std::endl; - - this->subMesh.setDimensions( this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - Containers::StaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <<std::endl; - - this->delta = parameters.getParameter <double>("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >(); - - cout << "Setting delta to " << this->delta <<std::endl; - - this->tau0 = parameters.getParameter <double>("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <<std::endl; - this->stopTime = parameters.getParameter <double>("stop-time"); - - this->cflCondition = parameters.getParameter <double>("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >()); - cout << "Setting CFL to " << this->cflCondition <<std::endl; - - stretchGrid(); - this->stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <<std::endl; - //this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*this->mesh.template getSpaceStepsProducts< 1, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <<std::endl; - - cout << "Initializating scheme..." <<std::endl; - if(!this->schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <<std::endl; - return false; - } - cout << "Scheme initialized." <<std::endl; - - test(); - - VectorType* tmp = new VectorType[subgridValues.getSize()]; - bool containsCurve = false; - -#ifdef HAVE_CUDA - - if(this->device == tnlCudaDevice) - { - /*cout << "Testing... " <<std::endl; - if(this->device == tnlCudaDevice) - { - if( !initCUDA2D(parameters, gridRows, gridCols) ) - return false; - }*/ - //cout << "s" <<std::endl; - cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >)); - //cout << "s" <<std::endl; - cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - //cout << "s" <<std::endl; - double** tmpdev = NULL; - cudaMalloc(&tmpdev, sizeof(double*)); - //double* tmpw; - cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s " <<std::endl; - //cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double)); - double* tmpu = NULL; - - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - //printf("%p %p \n",tmpu,tmpw); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s "<<std::endl; - - } -#endif - - if(this->device == TNL::Devices::HostDevice) - { - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - - if(! tmp[i].setSize(this->n * this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <<std::endl; - tmp[i] = getSubgrid(i); - containsCurve = false; - - for(int j = 0; j < tmp[i].getSize(); j++) - { - if(tmp[i][0]*tmp[i][j] <= 0.0) - { - containsCurve = true; - j=tmp[i].getSize(); - } - - } - if(containsCurve) - { - //cout << "Computing initial SDF on subgrid " << i << "." <<std::endl; - tmp[i] = runSubgrid(0, tmp[i],i); - insertSubgrid(tmp[i], i); - setSubgridValue(i, 4); - //cout << "Computed initial SDF on subgrid " << i << "." <<std::endl; - } - containsCurve = false; - - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { -// cout << "pre 1 kernel" <<std::endl; - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); -// cout << "post 1 kernel" <<std::endl; - - } -#endif - - - this->currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] <<" " << test[1] <<" " << test[2] <<" " << test[3] <<std::endl; - //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] <<" " << test[1] << " " <<test[2] << " " <<test[3] <<std::endl; - - TNL_CHECK_CUDA_DEVICE; - - synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << test[0] << " " <<test[1] <<" " << test[2] << " " <<test[3] <<std::endl; - //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //TNL_CHECK_CUDA_DEVICE; - //cout << this->tmpw << " " << test[0] << " " <<test[1] << " " <<test[2] <<" " << test[3] <<std::endl; - //free(test); - - } - -#endif - cout << "Solver initialized." <<std::endl; - - return true; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - - bool end = false; - while ((this->boundaryConditions.max() > 0 ) || !end) - { - if(this->boundaryConditions.max() == 0 ) - end=true; - else - end=false; -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - if(getSubgridValue(i) != INT_MAX) - { - VectorType tmp; - tmp.setSize(this->n * this->n); - //cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <<std::endl; - - if(getSubgridValue(i) == currentStep+4) - { - - if(getBoundaryCondition(i) & 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i); - insertSubgrid( tmp, 2); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 1)//) - /* &&(!(getBoundaryCondition(i) & 5) && !(getBoundaryCondition(i) & 10)) */) - { - //cout << "3 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i); - insertSubgrid( tmp, 3); - } - if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 1)//) - /* &&(!(getBoundaryCondition(i) & 3) && !(getBoundaryCondition(i) & 12)) */) - { - //cout << "5 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(5, tmp ,i); - insertSubgrid( tmp, i); - } - if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 8)//) - /* &&(!(getBoundaryCondition(i) & 12) && !(getBoundaryCondition(i) & 3))*/ ) - { - //cout << "10 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(10, tmp ,i); - insertSubgrid( tmp, i); - } - if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 8)//) - /*&&(!(getBoundaryCondition(i) & 10) && !(getBoundaryCondition(i) & 5)) */) - { - //cout << "12 @ " << getBoundaryCondition(i) <<std::endl; - tmp = getSubgrid(i); - tmp = runSubgrid(12, tmp ,i); - insertSubgrid( tmp, i); - } - - - /*if(getBoundaryCondition(i)) - { - insertSubgrid( runSubgrid(15, getSubgrid(i),i), i); - }*/ - - setBoundaryCondition(i, 0); - - setSubgridValue(i, getSubgridValue(i)-1); - - } - } - synchronize(); - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - //cout << "fn" <<std::endl; - bool end_cuda = false; - dim3 threadsPerBlock(this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cudaMalloc(&runcuda,sizeof(bool)); - //cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice); - //cout << "fn" <<std::endl; - bool* tmpb; - //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - //cudaDeviceSynchronize(); - //TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "fn" <<std::endl; - int i = 1; - time_diff = 0.0; - while (run_host || !end_cuda) - { - cout << "Computing at step "<< i++ <<std::endl; - if(run_host != 0 ) - end_cuda = true; - else - end_cuda = false; - //cout << "a" <<std::endl; - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - start = std::clock(); - runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - //cout << "a" <<std::endl; - cudaDeviceSynchronize(); - time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - //start = std::clock(); - synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - - //cout << "a" <<std::endl; - //run_host = false; - //cout << "in kernel loop" << run_host <<std::endl; - //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - //cout << "in kernel loop" << run_host <<std::endl; - } - cout << "Solving time was: " << time_diff <<std::endl; - //cout << "b" <<std::endl; - - //double* tmpu; - //cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice); - //cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->work_u.getData()[0] <<std::endl; - - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] << test[1] << test[2] << test[3] <<std::endl; - cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] << test[1] << test[2] << test[3] <<std::endl; - //free(test); - - cudaDeviceSynchronize(); - } -#endif - contractGrid(); - this->u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl; - cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl; - cout << "Solver finished" <<std::endl; - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <<std::endl; - int tmp1, tmp2; - int grid1, grid2; - - if(this->currentStep & 1) - { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i; - tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); - } - } - } - - } - else - { - for(int i = 1; i < this->gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1; - tmp2 = this->gridCols*this->n*j + i*this->n ; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j" << i << "," << j <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } - } - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl; - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <<std::endl; - - - this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - - //this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ; - //this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ; - - cout << "Setting gridCols to " << this->gridCols << "." <<std::endl; - cout << "Setting gridRows to " << this->gridRows << "." <<std::endl; - - this->subgridValues.setSize(this->gridCols*this->gridRows); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <<std::endl; - if(!this->unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <<std::endl; - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - for(int i = 0; i < stretchedSize; i++) - { - this->unusedCell[i] = 1; - int diff =(this->n*this->gridCols) - idealStretch ; - //cout << "diff = " << diff <<endl; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - //cout << i%(this->n*this->gridCols) - idealStretch +1 <<std::endl; - k+= i%(this->n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - //cout << i/(this->n*this->gridCols) - idealStretch + 1 <<std::endl; - k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - //cout << "i = " << i << " : i-k = " << i-k <<std::endl; - /*int j=(i % (this->n*this->gridCols)) - ( (this->mesh.getDimensions().x() - this->n)/(this->n - 1) + this->mesh.getDimensions().x() - 1) - + (this->n*this->gridCols - this->mesh.getDimensions().x())*(i/(this->n*this->n*this->gridCols)) ; - - if(j > 0) - k += j; - - int l = i-k - (this->u0.getSize() - 1); - int m = (l % this->mesh.getDimensions().x()); - - if(l>0) - k+= l + ( (l / this->mesh.getDimensions().x()) + 1 )*this->mesh.getDimensions().x() - (l % this->mesh.getDimensions().x());*/ - - this->work_u[i] = this->u0[i-k]; - //cout << (i-k) <<endl; - } - - - cout << "Grid stretched." <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <<std::endl; - int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - for(int i = 0; i < stretchedSize; i++) - { - int diff =(this->n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0)) - { - //cout << i <<" : " <<i-k<<std::endl; - this->u0[i-k] = this->work_u[i]; - } - - } - - cout << "Grid contracted" <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - VectorType u; - u.setSize(this->n*this->n); - - for( int j = 0; j < u.getSize(); j++) - { - u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols - + (i % this->gridCols) * this->n - + (j/this->n) * this->n*this->gridCols - + (j % this->n) ]; - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - - for( int j = 0; j < this->n*this->n; j++) - { - int index = (i / this->gridCols)*this->n*this->n*this->gridCols - + (i % this->gridCols)*this->n - + (j/this->n)*this->n*this->gridCols - + (j % this->n); - //OMP LOCK index - if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j]; - this->unusedCell[index] = 0; - } - //OMP UNLOCK index - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - -/* - * Insert Euler-Solver Here - */ - - /**/ - - /*for(int i = 0; i < u.getSize(); i++) - { - int x = this->subMesh.getCellCoordinates(i).x(); - int y = this->subMesh.getCellCoordinates(i).y(); - - if(x == 0 && (boundaryCondition & 4) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = 0; y = 0" <<std::endl; - u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); - } - } - else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = 0; y = n" <<std::endl; - u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); - } - } - - - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0) - { - if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = n; y = 0" <<std::endl; - u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); - } - } - else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1) - { - if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) - { - //cout << "x = n; y = n" <<std::endl; - u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); - } - } - - - else if(y == 0 && (boundaryCondition & 8) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = 0; x = 0" <<std::endl; - u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - } - else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = 0; x = n" <<std::endl; - u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - } - - - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0) - { - if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) { - //cout << "y = n; x = 0" <<std::endl; - u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - } - else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1) - { - if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) - { - //cout << "y = n; x = n" <<std::endl; - u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - } - }*/ - - /**/ - - -/* bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - } - - - if(tmp) - {} - else if(boundaryCondition == 4) - { - int i; - for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 8) - { - int i; - for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - - } - else if(boundaryCondition == 2) - { - int i; - for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) - { - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - else if(boundaryCondition == 1) - { - int i; - for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i)) - { - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } - int j; - for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) - { - u[j] = u[i]; - } - u[j] = u[i]; - } -*/ - /**/ - - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - int centerGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1); - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - } - //if(this->currentStep + 3 < getSubgridValue(subGridID)) - //tmp = true; - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } - -/* - - else if(boundaryCondition == 5) - { - for(int i = 0; i < this->n - 1; i++) - for(int j = 1;j < this->n; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 10) - { - for(int i = 1; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 3) - { - for(int j = 0; j < this->n - 1; j++) - for(int i = 0;i < this->n - 1; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 12) - { - for(int j = 1; j < this->n; j++) - for(int i = 1;i < this->n; i++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[i*this->n + j] = value;// u[j]; - } -*/ - - - /**/ - - /*if (u.max() > 0.0) - this->stopTime *=(double) this->gridCols;*/ - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - //double lastResidue( 10000.0 ); - tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { - Entity.setCoordinates(Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x())); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - fu[ i ] = schemeHost.getValue( this->subMesh, i, Containers::StaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighborEntities); - } - maxResidue = fu. absMax(); - - - if( this -> cflCondition * maxResidue != 0.0) - currentTau = this -> cflCondition / maxResidue; - - /* if (maxResidue < 0.05) - std::cout << "Max < 0.05" <<std::endl;*/ - if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) - { - //cout << currentTau << " >= " << 2.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >() <<std::endl; - currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - /*if(maxResidue > lastResidue) - currentTau *=(1.0/10.0);*/ - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; -// for( int i = 0; i < fu.getSize(); i ++ ) -// { -// //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <<std::endl; -// if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 ) -// currentTau = fabs(u[i]/(2.0*fu[i])); -// -// } - - - for( int i = 0; i < fu.getSize(); i ++ ) - { - double add = u[i] + currentTau * fu[ i ]; - //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) ) - u[ i ] = add; - } - time += currentTau; - - //cout << '\r' << flush; - //cout << maxResidue << " " << currentTau << " @ " << time << flush; - //lastResidue = maxResidue; - } - //cout << "Time: " << time << ", Res: " << maxResidue <<endl; - /*if (u.max() > 0.0) - this->stopTime /=(double) this->gridCols;*/ - - VectorType solution; - solution.setLike(u); - for( int i = 0; i < u.getSize(); i ++ ) - { - solution[i]=u[i]; - } - return solution; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - //int j = threadIdx.x + threadIdx.y * blockDim.x; - int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - //printf("i= %d,j= %d,th= %d\n",i,j,th); - *a = caller->work_u_cuda[th]; - //printf("Hi %f \n", *a); - //return ret; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ -// int j = threadIdx.x + threadIdx.y * blockDim.x; - int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols - + (blockIdx.x) * caller->n - + threadIdx.y * caller->n*caller->gridCols - + threadIdx.x; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i ) -{ - - -// int j = threadIdx.x + threadIdx.y * blockDim.x; - //printf("j = %d, u = %f\n", j,u); - - int index = (blockIdx.y)*this->n*this->n*this->gridCols - + (blockIdx.x)*this->n - + threadIdx.y*this->n*this->gridCols - + threadIdx.x; - - //printf("i= %d,j= %d,index= %d\n",i,j,index); - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - //double tmpRes = 0.0; - volatile double* sharedTau = &u[blockDim.x*blockDim.y]; - volatile double* absVal = &u[2*blockDim.x*blockDim.y]; - int i = threadIdx.x; - int j = threadIdx.y; - int l = threadIdx.y * blockDim.x + threadIdx.x; - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1); - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - /*if(!tmp && (u[0]*u[l] <= 0.0)) - atomicMax( &tmp, 1);*/ - - __syncthreads(); - if(tmp !=1) - { -// if(computeFU) -// absVal[l]=0.0; -// else -// absVal[l] = fabs(u[l]); -// -// __syncthreads(); -// -// if((blockDim.x == 16) && (l < 128)) absVal[l] = Max(absVal[l],absVal[l+128]); -// __syncthreads(); -// if((blockDim.x == 16) && (l < 64)) absVal[l] = Max(absVal[l],absVal[l+64]); -// __syncthreads(); -// if(l < 32) absVal[l] = Max(absVal[l],absVal[l+32]); -// if(l < 16) absVal[l] = Max(absVal[l],absVal[l+16]); -// if(l < 8) absVal[l] = Max(absVal[l],absVal[l+8]); -// if(l < 4) absVal[l] = Max(absVal[l],absVal[l+4]); -// if(l < 2) absVal[l] = Max(absVal[l],absVal[l+2]); -// if(l < 1) value = sign(u[0])*Max(absVal[l],absVal[l+1]); -// __syncthreads(); -// -// if(computeFU) -// u[l] = value; - if(computeFU) - { - if(boundaryCondition == 4) - u[l] = u[threadIdx.y * blockDim.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n); - else if(boundaryCondition == 2) - u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); - else if(boundaryCondition == 8) - u[l] = u[threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n); - else if(boundaryCondition == 1) - u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; -// if(threadIdx.x * threadIdx.y == 0) -// { -// currentTau = finalTime; -// } - double finalTime = this->stopTime; - __syncthreads(); -// if( time + currentTau > finalTime ) currentTau = finalTime - time; - - tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<2,int>(i,j)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - while( time < finalTime ) - { - if(computeFU) - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighborEntities); - - sharedTau[l]=abs(cfl/fu); - - if(l == 0) - { - if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); - } - else if(l == blockDim.x*blockDim.y - 1) - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - - -// if( (sign(u[l]+sharedTau[l]*fu) != sign(u[l])) && fu != 0.0 && fu != -0.0) -// { -// printf("orig: %10f", sharedTau[l]); -// sharedTau[l]=abs(u[l]/(1.1*fu)) ; -// printf(" new: %10f\n", sharedTau[l]); -// } - - - - if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - - u[l] += currentTau * fu; - time += currentTau; - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const -{ - - return ((i / (this->gridCols*this->n*this->n))*this->gridCols - + (i % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8 - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[4]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - newSubgridValue = 0; - //printf("%d %d\n", blockDim.x, gridDim.x); - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); - boundary_index = 0; - } - -// __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - } - __threadfence(); - __syncthreads(); - - if(threadIdx.x+threadIdx.y == 0) - { - if(subgridValue == INT_MAX && newSubgridValue !=0) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3]); - - - if(blockIdx.x+blockIdx.y ==0) - { - cudaSolver->currentStep = cudaSolver->currentStep + 1; - *(cudaSolver->runcuda) = 0; - } -// -// int stepValue = cudaSolver->currentStep + 4; -// if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) -// cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); -// -// atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); - } - - - /* - //printf("I am not an empty kernel!\n"); - //cout << "Synchronizig..." <<std::endl; - int tmp1, tmp2; - int grid1, grid2; - - if(cudaSolver->currentStep & 1) - { - //printf("I am not an empty kernel! 1\n"); - for(int j = 0; j < cudaSolver->gridRows - 1; j++) - { - //printf("I am not an empty kernel! 3\n"); - for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++) - { - tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i; - tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1); - } - } - } - - } - else - { - //printf("I am not an empty kernel! 2\n"); - for(int i = 1; i < cudaSolver->gridCols; i++) - { - //printf("I am not an empty kernel! 4\n"); - for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++) - { - - tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1; - tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ; - grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); - grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); - - if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; - cudaSolver->unusedCell_cuda[tmp2] = 0; - if(grid2 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4); - } - else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); - cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; - cudaSolver->unusedCell_cuda[tmp1] = 0; - if(grid1 == INT_MAX) - { - cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); - } - if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) ) - cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2); - } - } - } - } - //printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep); - - cudaSolver->currentStep = cudaSolver->currentStep + 1; - int stepValue = cudaSolver->currentStep + 4; - for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++) - { - if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(i, stepValue); - } - - int maxi = 0; - for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++) - { - //printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]); - maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q)); - } - //printf("I am not an empty kernel! %d\n", maxi); - *(cudaSolver->runcuda) = (maxi > 0); - //printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]); - //cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl; -*/ -} - - - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void synchronize2CUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ -// if(blockIdx.x+blockIdx.y ==0) -// { -// cudaSolver->currentStep = cudaSolver->currentStep + 1; -// *(cudaSolver->runcuda) = 0; -// } - - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3) -{ - //cout << "Initializating solver..." <<std::endl; - //const String& meshLocation = parameters.getParameter <String>("mesh"); - //this->mesh_cuda.load( meshLocation ); - - //this->n_cuda = parameters.getParameter <int>("subgrid-size"); - //cout << "Setting N << this->n_cuda <<std::endl; - - //this->subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda ); - //this->subMesh_cuda.setDomain( Containers::StaticVector<2,double>(0.0, 0.0), - //Containers::StaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) ); - - //this->subMesh_cuda.save("submesh.tnl"); - -// const String& initialCondition = parameters.getParameter <String>("initial-condition"); -// this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <<std::endl; - - //this->delta_cuda = parameters.getParameter <double>("delta"); - //this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >(); - - //cout << "Setting delta to " << this->delta <<std::endl; - - //this->tau0_cuda = parameters.getParameter <double>("initial-tau"); - //cout << "Setting initial tau to " << this->tau0_cuda <<std::endl; - //this->stopTime_cuda = parameters.getParameter <double>("stop-time"); - - //this->cflCondition_cuda = parameters.getParameter <double>("cfl-condition"); - //this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()); - //cout << "Setting CFL to " << this->cflCondition <<std::endl; -//// -//// - -// this->gridRows_cuda = gridRows; -// this->gridCols_cuda = gridCols; - - cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); - cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); - cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); - *(cudaSolver->runcuda) = 1; - cudaSolver->currentStep = 1; - //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); - //ptr = cudaSolver->work_u_cuda; - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) - { - printf("%d\n",j); - cudaSolver->unusedCell_cuda[ j] = 1; - }*/ - printf("GPU memory initialized.\n"); - - - //cudaSolver->work_u_cuda[50] = 32.153438; -//// -//// - //stretchGrid(); - //this->stopTime_cuda /= (double)(this->gridCols_cuda); - //this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0)); - //cout << "Setting stopping time to " << this->stopTime <<std::endl; - //this->stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter <double>("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <<std::endl; - - //cout << "Initializating scheme..." <<std::endl; - //if(!this->schemeDevice.init(parameters)) -// { - //cerr << "Scheme failed to initialize." <<std::endl; -// return false; -// } - //cout << "Scheme initialized." <<std::endl; - - //test(); - -// this->currentStep_cuda = 1; - //return true; -} - - - - -//extern __shared__ double array[]; -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - - - extern __shared__ double u[]; - //printf("%p\n",caller->work_u_cuda); - - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - containsCurve = 0; - - //double a; - caller->getSubgridCUDA2D(i,caller, &u[l]); - //printf("%f %f\n",a , u[l]); - //u[l] = a; - //printf("Hi %f \n", u[l]); - __syncthreads(); - //printf("hurewrwr %f \n", u[l]); - if(u[0] * u[l] <= 0.0) - { - //printf("contains %d \n",i); - atomicMax( &containsCurve, 1); - } - - __syncthreads(); - //printf("hu"); - //printf("%d : %f\n", l, u[l]); - if(containsCurve == 1) - { - //printf("have curve \n"); - caller->runSubgridCUDA2D(0,u,i); - //printf("%d : %f\n", l, u[l]); - __syncthreads(); - caller->insertSubgridCUDA2D(u[l],i); - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA2D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void /*tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelEikonalSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA2D(i); - - if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0) - { - caller->getSubgridCUDA2D(i,caller, &u[l]); - - //if(l == 0) - //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i)); - if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2 ) - { - caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - if( ((bound & 3 ))) - { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 5 ))) - { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 10 ))) - { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 12 )) - { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - } - - - else - { - - - - - - - - - - if( ((bound == 2))) - { - caller->runSubgridCUDA2D(2,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 1) )) - { - caller->runSubgridCUDA2D(1,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 8) )) - { - caller->runSubgridCUDA2D(8,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound == 4)) - { - caller->runSubgridCUDA2D(4,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - - - - - - if( ((bound & 3) )) - { - caller->runSubgridCUDA2D(3,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 5) )) - { - caller->runSubgridCUDA2D(5,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 10) )) - { - caller->runSubgridCUDA2D(10,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 12) ) - { - caller->runSubgridCUDA2D(12,u,i); - //__syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - } - - - - - - - - - - - - - } - /*if( bound ) - { - caller->runSubgridCUDA2D(15,u,i); - __syncthreads(); - //caller->insertSubgridCUDA2D(u[l],i); - //__syncthreads(); - //caller->getSubgridCUDA2D(i,caller, &u[l]); - caller->updateSubgridCUDA2D(i,caller, &u[l]); - __syncthreads(); - }*/ - - if(l==0) - { - caller->setBoundaryConditionCUDA2D(i, 0); - caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELEIKONALSOLVER2D_IMPL_H_ */ diff --git a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h b/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h deleted file mode 100644 index dc3fd54679..0000000000 --- a/src/TNL/Legacy/hamilton-jacobi-parallel/tnlParallelEikonalSolver3D_impl.h +++ /dev/null @@ -1,1706 +0,0 @@ -/*************************************************************************** - tnlParallelEikonalSolver2D_impl.h - description - ------------------- - begin : Nov 28 , 2014 - copyright : (C) 2014 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLPARALLELEIKONALSOLVER3D_IMPL_H_ -#define TNLPARALLELEIKONALSOLVER3D_IMPL_H_ - - -#include "tnlParallelEikonalSolver.h" -#include <core/mfilename.h> - -template< typename SchemeHost, typename SchemeDevice, typename Device> -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelEikonalSolver() -{ - cout << "a" <<std::endl; - this->device = TNL::Devices::HostDevice; /////////////// tnlCuda Device --- vypocet na GPU, TNL::Devices::HostDevice --- vypocet na CPU - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - run_host = 1; - } -#endif - - cout << "b" <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::test() -{ -/* - for(int i =0; i < this->subgridValues.getSize(); i++ ) - { - insertSubgrid(getSubgrid(i), i); - } -*/ -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> - -bool tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::init( const Config::ParameterContainer& parameters ) -{ - cout << "Initializating solver..." <<std::endl; - const String& meshLocation = parameters.getParameter <String>("mesh"); - this->mesh.load( meshLocation ); - - this->n = parameters.getParameter <int>("subgrid-size"); - cout << "Setting N to " << this->n <<std::endl; - - this->subMesh.setDimensions( this->n, this->n, this->n ); - this->subMesh.setDomain( Containers::StaticVector<3,double>(0.0, 0.0, 0.0), - Containers::StaticVector<3,double>(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1, 0 >()*(double)(this->n),mesh.template getSpaceStepsProducts< 0, 0, 1 >()*(double)(this->n)) ); - - this->subMesh.save("submesh.tnl"); - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - this->u0.load( initialCondition ); - - //cout << this->mesh.getCellCenter(0) <<std::endl; - - this->delta = parameters.getParameter <double>("delta"); - this->delta *= mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >(); - - cout << "Setting delta to " << this->delta <<std::endl; - - this->tau0 = parameters.getParameter <double>("initial-tau"); - cout << "Setting initial tau to " << this->tau0 <<std::endl; - this->stopTime = parameters.getParameter <double>("stop-time"); - - this->cflCondition = parameters.getParameter <double>("cfl-condition"); - this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0, 0 >()*mesh.template getSpaceStepsProducts< 0, 1, 0 >()); - cout << "Setting CFL to " << this->cflCondition <<std::endl; - - stretchGrid(); - this->stopTime /= (double)(this->gridCols); - this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); - cout << "Setting stopping time to " << this->stopTime <<std::endl; - //this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - //cout << "Setting stopping time to " << this->stopTime <<std::endl; - - cout << "Initializating scheme..." <<std::endl; - if(!this->schemeHost.init(parameters)) - { - cerr << "SchemeHost failed to initialize." <<std::endl; - return false; - } - cout << "Scheme initialized." <<std::endl; - - test(); - - VectorType* tmp = new VectorType[subgridValues.getSize()]; - - -#ifdef HAVE_CUDA - - if(this->device == tnlCudaDevice) - { - /*cout << "Testing... " <<std::endl; - if(this->device == tnlCudaDevice) - { - if( !initCUDA3D(parameters, gridRows, gridCols) ) - return false; - }*/ - //cout << "s" <<std::endl; - cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >)); - //cout << "s" <<std::endl; - cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); - //cout << "s" <<std::endl; - double** tmpdev = NULL; - cudaMalloc(&tmpdev, sizeof(double*)); - //double* tmpw; - cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); - cudaMalloc(&(this->runcuda), sizeof(int)); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - int* tmpUC; - cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); - cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); - - initCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s " <<std::endl; - //cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double)); - double* tmpu = NULL; - - cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); - //printf("%p %p \n",tmpu,tmpw); - cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "s "<<std::endl; - - } -#endif - - if(this->device == TNL::Devices::HostDevice) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - bool containsCurve = false; -// cout << "Working on subgrid " << i <<" --- check 1" <<std::endl; - - if(! tmp[i].setSize(this->n*this->n*this->n)) - cout << "Could not allocate tmp["<< i <<"] array." <<std::endl; -// cout << "Working on subgrid " << i <<" --- check 2" <<std::endl; - - tmp[i] = getSubgrid(i); - containsCurve = false; -// cout << "Working on subgrid " << i <<" --- check 3" <<std::endl; - - - for(int j = 0; j < tmp[i].getSize(); j++) - { - if(tmp[i][0]*tmp[i][j] <= 0.0) - { - containsCurve = true; - j=tmp[i].getSize(); -// cout << tmp[i][0] << " " << tmp[i][j] <<std::endl; - } - - } -// cout << "Working on subgrid " << i <<" --- check 4" <<std::endl; - - if(containsCurve) - { -// cout << "Computing initial SDF on subgrid " << i << "." <<std::endl; - tmp[i] = runSubgrid(0, tmp[i] ,i); - insertSubgrid( tmp[i], i); - setSubgridValue(i, 4); -// cout << "Computed initial SDF on subgrid " << i << "." <<std::endl; - } - containsCurve = false; - - } -// cout << "CPU: Curve found" <<std::endl; - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { -// cout << "pre 1 kernel" <<std::endl; - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - dim3 threadsPerBlock(this->n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initRunCUDA3D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - cudaDeviceSynchronize(); -// cout << "post 1 kernel" <<std::endl; - - } -#endif - - - this->currentStep = 1; - if(this->device == TNL::Devices::HostDevice) - synchronize(); -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - dim3 threadsPerBlock(this->n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] <<" " << test[1] <<" " << test[2] <<" " << test[3] <<std::endl; - //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] <<" " << test[1] << " " <<test[2] << " " <<test[3] <<std::endl; - - TNL_CHECK_CUDA_DEVICE; - - synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cout << cudaGetErrorString(cudaDeviceSynchronize()) <<std::endl; - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << test[0] << " " <<test[1] <<" " << test[2] << " " <<test[3] <<std::endl; - //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //TNL_CHECK_CUDA_DEVICE; - //cout << this->tmpw << " " << test[0] << " " <<test[1] << " " <<test[2] <<" " << test[3] <<std::endl; - //free(test); - - } - -#endif - cout << "Solver initialized." <<std::endl; - - return true; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::run() -{ - if(this->device == TNL::Devices::HostDevice) - { - - bool end = false; - while (/*(this->boundaryConditions.max() > 0 ) ||*/ !end) - { - if(this->boundaryConditions.max() == 0 || this->subgridValues.max() < 0) - end=true; - else - end=false; -#ifdef HAVE_OPENMP -#pragma omp parallel for num_threads(4) schedule(dynamic) -#endif - for(int i = 0; i < this->subgridValues.getSize(); i++) - { - VectorType tmp; - tmp.setSize(this->n*this->n*this->n); - if(getSubgridValue(i) != INT_MAX) - { - //cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) <<std::endl; - - if(getSubgridValue(i) == currentStep+4) - { - - if(getBoundaryCondition(i) & 1) - { - tmp = getSubgrid(i); - tmp = runSubgrid(1, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 2) - { - tmp = getSubgrid(i); - tmp = runSubgrid(2, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 4) - { - tmp = getSubgrid(i); - tmp = runSubgrid(4, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 8) - { - tmp = getSubgrid(i); - tmp = runSubgrid(8, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 16) - { - tmp = getSubgrid(i); - tmp = runSubgrid(16, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - if(getBoundaryCondition(i) & 32) - { - tmp = getSubgrid(i); - tmp = runSubgrid(32, tmp ,i); - insertSubgrid( tmp, i); - this->calculationsCount[i]++; - } - } - - if( getBoundaryCondition(i) & 19) - { - tmp = getSubgrid(i); - tmp = runSubgrid(19, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 21) - { - tmp = getSubgrid(i); - tmp = runSubgrid(21, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 26) - { - tmp = getSubgrid(i); - tmp = runSubgrid(26, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 28) - { - tmp = getSubgrid(i); - tmp = runSubgrid(28, tmp ,i); - insertSubgrid( tmp, i); - } - - if( getBoundaryCondition(i) & 35) - { - tmp = getSubgrid(i); - tmp = runSubgrid(35, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 37) - { - tmp = getSubgrid(i); - tmp = runSubgrid(37, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 42) - { - tmp = getSubgrid(i); - tmp = runSubgrid(42, tmp ,i); - insertSubgrid( tmp, i); - } - if( getBoundaryCondition(i) & 44) - { - tmp = getSubgrid(i); - tmp = runSubgrid(44, tmp ,i); - insertSubgrid( tmp, i); - } - - - setBoundaryCondition(i, 0); - setSubgridValue(i, getSubgridValue(i)-1); - - } - } - synchronize(); - } - } -#ifdef HAVE_CUDA - else if(this->device == tnlCudaDevice) - { - //cout << "fn" <<std::endl; - bool end_cuda = false; - dim3 threadsPerBlock(this->n, this->n, this->n); - dim3 numBlocks(this->gridCols,this->gridRows,this->gridLevels); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cudaMalloc(&runcuda,sizeof(bool)); - //cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice); - //cout << "fn" <<std::endl; - bool* tmpb; - //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - //cudaDeviceSynchronize(); - //TNL_CHECK_CUDA_DEVICE; - cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //cout << "fn" <<std::endl; - int i = 1; - time_diff = 0.0; - while (run_host || !end_cuda) - { - cout << "Computing at step "<< i++ <<std::endl; - if(run_host != 0 ) - end_cuda = true; - else - end_cuda = false; - //cout << "a" <<std::endl; - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - start = std::clock(); - runCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,2*this->n*this->n*this->n*sizeof(double)>>>(this->cudaSolver); - //cout << "a" <<std::endl; - cudaDeviceSynchronize(); - time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - //start = std::clock(); - synchronizeCUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - synchronize2CUDA3D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); - - - //cout << "a" <<std::endl; - //run_host = false; - //cout << "in kernel loop" << run_host <<std::endl; - //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); - cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); - //cout << "in kernel loop" << run_host <<std::endl; - } - cout << "Solving time was: " << time_diff <<std::endl; - //cout << "b" <<std::endl; - - //double* tmpu; - //cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice); - //cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->work_u.getData()[0] <<std::endl; - - //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); - //cout << test[0] << test[1] << test[2] << test[3] <<std::endl; - cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); - //cout << this->tmpw << " " << test[0] << test[1] << test[2] << test[3] <<std::endl; - //free(test); - - cudaDeviceSynchronize(); - } -#endif - contractGrid(); - this->u0.save("u-00001.tnl"); - cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() <<std::endl; - cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) <<std::endl; - cout << "Solver finished" <<std::endl; - -#ifdef HAVE_CUDA - if(this->device == tnlCudaDevice) - { - cudaFree(this->runcuda); - cudaFree(this->tmpw); - cudaFree(this->cudaSolver); - } -#endif - -} - -//north - 1, east - 2, west - 4, south - 8 -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - cout << "Synchronizig..." <<std::endl; - int tmp1, tmp2; - int grid1, grid2; - -// if(this->currentStep & 1) -// { - for(int j = 0; j < this->gridRows - 1; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - for (int k = 0; k < this->gridLevels*this->n; k++) - { -// cout << "a" <<std::endl; - tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n; -// cout << "b" <<std::endl; - tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i + k*this->gridCols*this->n*this->gridRows*this->n; -// cout << "c" <<std::endl; - if(tmp1 > work_u.getSize()) - cout << "tmp1: " << tmp1 << " x: " << j <<" y: " << i <<" z: " << k <<std::endl; - if(tmp2 > work_u.getSize()) - cout << "tmp2: " << tmp2 << " x: " << j <<" y: " << i <<" z: " << k <<std::endl; - grid1 = getSubgridValue(getOwner(tmp1)); -// cout << "d" <<std::endl; - grid2 = getSubgridValue(getOwner(tmp2)); -// cout << "e" <<std::endl; - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j, k" << i << "," << j << "," << k <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; -// cout << "f" <<std::endl; - this->unusedCell[tmp2] = 0; -// cout << "g" <<std::endl; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } -// cout << "h" <<std::endl; - if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); -// cout << "i" <<std::endl; - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; -// cout << "j" <<std::endl; - this->unusedCell[tmp1] = 0; -// cout << "k" <<std::endl; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } -// cout << "l" <<std::endl; - if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); -// cout << "m" <<std::endl; - } - } - } - } - -// } -// else -// { - - cout << "sync 2" <<std::endl; - for(int i = 1; i < this->gridCols; i++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - for (int k = 0; k < this->gridLevels*this->n; k++) - { - tmp1 = this->gridCols*this->n*j + i*this->n - 1 + k*this->gridCols*this->n*this->gridRows*this->n; - tmp2 = this->gridCols*this->n*j + i*this->n + k*this->gridCols*this->n*this->gridRows*this->n; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j, k" << i << "," << j << "," << k <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); - } - } - } - } - - cout << "sync 3" <<std::endl; - - for(int k = 1; k < this->gridLevels; k++) - { - for (int j = 0; j < this->gridRows*this->n; j++) - { - for (int i = 0; i < this->gridCols*this->n; i++) - { - tmp1 = this->gridCols*this->n*j + i + (k*this->n-1)*this->gridCols*this->n*this->gridRows*this->n; - tmp2 = this->gridCols*this->n*j + i + k*this->n*this->gridCols*this->n*this->gridRows*this->n; - grid1 = getSubgridValue(getOwner(tmp1)); - grid2 = getSubgridValue(getOwner(tmp2)); - if(getOwner(tmp1)==getOwner(tmp2)) - cout << "i, j, k" << i << "," << j << "," << k <<std::endl; - if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) - { - this->work_u[tmp2] = this->work_u[tmp1]; - this->unusedCell[tmp2] = 0; - if(grid2 == INT_MAX) - { - setSubgridValue(getOwner(tmp2), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp2)) & 32) ) - setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+32); - } - else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) - { - this->work_u[tmp1] = this->work_u[tmp2]; - this->unusedCell[tmp1] = 0; - if(grid1 == INT_MAX) - { - setSubgridValue(getOwner(tmp1), -INT_MAX); - } - if(! (getBoundaryCondition(getOwner(tmp1)) & 16) ) - setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+16); - } - } - } - } -// } - - - - this->currentStep++; - int stepValue = this->currentStep + 4; - for (int i = 0; i < this->subgridValues.getSize(); i++) - { - if( getSubgridValue(i) == -INT_MAX ) - setSubgridValue(i, stepValue); - } - - cout << "Grid synchronized at step " << (this->currentStep - 1 ) <<std::endl; - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const -{ - - int j = i % (this->gridCols*this->gridRows*this->n*this->n); - - return ( (i / (this->gridCols*this->gridRows*this->n*this->n*this->n))*this->gridCols*this->gridRows - + (j / (this->gridCols*this->n*this->n))*this->gridCols - + (j % (this->gridCols*this->n))/this->n); -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const -{ - return this->subgridValues[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) -{ - this->subgridValues[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const -{ - return this->boundaryConditions[i]; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) -{ - this->boundaryConditions[i] = value; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() -{ - cout << "Stretching grid..." <<std::endl; - - - this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); - this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); - this->gridLevels = ceil( ((double)(this->mesh.getDimensions().z()-1)) / ((double)(this->n-1)) ); - - //this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ; - //this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ; - - cout << "Setting gridCols to " << this->gridCols << "." <<std::endl; - cout << "Setting gridRows to " << this->gridRows << "." <<std::endl; - cout << "Setting gridLevels to " << this->gridLevels << "." <<std::endl; - - this->subgridValues.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->subgridValues.setValue(0); - this->boundaryConditions.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->boundaryConditions.setValue(0); - this->calculationsCount.setSize(this->gridCols*this->gridRows*this->gridLevels); - this->calculationsCount.setValue(0); - - for(int i = 0; i < this->subgridValues.getSize(); i++ ) - { - this->subgridValues[i] = INT_MAX; - this->boundaryConditions[i] = 0; - } - - int levelSize = this->n*this->n*this->gridCols*this->gridRows; - int stretchedSize = this->n*levelSize*this->gridLevels; - - if(!this->work_u.setSize(stretchedSize)) - cerr << "Could not allocate memory for stretched grid." <<std::endl; - if(!this->unusedCell.setSize(stretchedSize)) - cerr << "Could not allocate memory for supporting stretched grid." <<std::endl; - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - - - - for(int i = 0; i < levelSize; i++) - { - int diff =(this->n*this->gridCols) - idealStretch ; - - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if(i%(this->n*this->gridCols) - idealStretch >= 0) - { - k+= i%(this->n*this->gridCols) - idealStretch +1 ; - } - - if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) - { - k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; - } - - for( int j = 0; j<this->n*this->gridLevels; j++) - { - this->unusedCell[i+j*levelSize] = 1; - int l = j/this->n; - - if(j - idealStretch >= 0) - { - l+= j - idealStretch + 1; - } - - this->work_u[i+j*levelSize] = this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k]; - } - - } - - - - cout << "Grid stretched." <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() -{ - cout << "Contracting grid..." <<std::endl; - int levelSize = this->n*this->n*this->gridCols*this->gridRows; - int stretchedSize = this->n*levelSize*this->gridLevels; - - int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); - cout << idealStretch <<std::endl; - - - for(int i = 0; i < levelSize; i++) - { - int diff =(this->n*this->gridCols) - idealStretch ; - int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; - - if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0) ) - { - for( int j = 0; j<this->n*this->gridLevels; j++) - { - int l = j/this->n; - if(j - idealStretch < 0) - this->u0[i+(j-l)*mesh.getDimensions().x()*mesh.getDimensions().y()-k] = this->work_u[i+j*levelSize]; - } - } - - } - - cout << "Grid contracted" <<std::endl; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const -{ - - VectorType u; - u.setSize(this->n*this->n*this->n); - - int idx, idy, idz; - idz = i / (gridRows*this->gridCols); - idy = (i % (this->gridRows*this->gridCols)) / this->gridCols; - idx = i % (this->gridCols); - - for( int j = 0; j < this->n; j++) - { - // int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - for( int k = 0; k < this->n; k++) - { - for( int l = 0; l < this->n; l++) - { - int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows - + (idy) * this->n*this->n*this->gridCols - + (idx) * this->n - + k * this->n*this->gridCols - + j; - - u[j + k*this->n + l*this->n*this->n] = this->work_u[ index ]; - } - } - } - return u; -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) -{ - int idx, idy, idz; - idz = i / (this->gridRows*this->gridCols); - idy = (i % (this->gridRows*this->gridCols)) / this->gridCols; - idx = i % (this->gridCols); - - for( int j = 0; j < this->n; j++) - { - // int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); - for( int k = 0; k < this->n; k++) - { - for( int l = 0; l < this->n; l++) - { - - int index = (idz*this->n + l) * this->n*this->n*this->gridCols*this->gridRows - + (idy) * this->n*this->n*this->gridCols - + (idx) * this->n - + k * this->n*this->gridCols - + j; - - //OMP LOCK index -// cout<< idx << " " << idy << " " << idz << " " << j << " " << k << " " << l << " " << idz << " " << unusedCell.getSize() << " " << u.getSize() << " " << index <<endl; - if( (fabs(this->work_u[index]) > fabs(u[j + k*this->n + l*this->n*this->n])) || (this->unusedCell[index] == 1) ) - { - this->work_u[index] = u[j + k*this->n + l*this->n*this->n]; - this->unusedCell[index] = 0; - } - //OMP UNLOCK index - } - } - } -} - -template< typename SchemeHost, typename SchemeDevice, typename Device> -typename tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::VectorType -tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID) -{ - - VectorType fu; - - fu.setLike(u); - fu.setValue( 0.0 ); - - - bool tmp = false; - for(int i = 0; i < u.getSize(); i++) - { - if(u[0]*u[i] <= 0.0) - tmp=true; - } - int idx,idy,idz; - idz = subGridID / (this->gridRows*this->gridCols); - idy = (subGridID % (this->gridRows*this->gridCols)) / this->gridCols; - idx = subGridID % (this->gridCols); - int centerGID = (this->n*idy + (this->n>>1) )*(this->n*this->gridCols) + this->n*idx + (this->n>>1) - + ((this->n>>1)+this->n*idz)*this->n*this->n*this->gridRows*this->gridCols; - if(this->unusedCell[centerGID] == 0 || boundaryCondition == 0) - tmp = true; - //if(this->currentStep + 3 < getSubgridValue(subGridID)) - //tmp = true; - - - double value = sign(u[0]) * u.absMax(); - - if(tmp) - {} - - - //north - 1, east - 2, west - 4, south - 8 - else if(boundaryCondition == 4) - { - for(int i = 0; i < this->n; i++) - for(int j = 1;j < this->n; j++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) - u[k*this->n*this->n + i*this->n + j] = value;// u[i*this->n]; - } - else if(boundaryCondition == 2) - { - for(int i = 0; i < this->n; i++) - for(int j =0 ;j < this->n -1; j++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) - u[k*this->n*this->n + i*this->n + j] = value;// u[(i+1)*this->n - 1]; - } - else if(boundaryCondition == 1) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n - 1; i++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 8) - { - for(int j = 0; j < this->n; j++) - for(int i = 1;i < this->n; i++) - for(int k = 0;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j]; - } - else if(boundaryCondition == 16) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n ; i++) - for(int k = 0;k < this->n-1; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; - } - else if(boundaryCondition == 32) - { - for(int j = 0; j < this->n; j++) - for(int i = 0;i < this->n; i++) - for(int k = 1;k < this->n; k++) - //if(fabs(u[i*this->n + j]) < fabs(u[j])) - u[k*this->n*this->n + i*this->n + j] = value;// u[j]; - } - - - double time = 0.0; - double currentTau = this->tau0; - double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); - if(boundaryCondition == 0) finalTime *= 2.0; - if( time + currentTau > finalTime ) currentTau = finalTime - time; - - double maxResidue( 1.0 ); - //double lastResidue( 10000.0 ); - tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*/) - { - /**** - * Compute the RHS - */ - - for( int i = 0; i < fu.getSize(); i ++ ) - { -// std::cout << "i: " << i << ", time: " << time <<endl; - Containers::StaticVector<3,int> coords(i % subMesh.getDimensions().x(), - (i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) / subMesh.getDimensions().x(), - i / (subMesh.getDimensions().x()*subMesh.getDimensions().y())); -// cout << "b " << i << " " << i % subMesh.getDimensions().x() << " " << (i % (subMesh.getDimensions().x()*subMesh.getDimensions().y())) << " " << (i % subMesh.getDimensions().x()*subMesh.getDimensions().y()) / subMesh.getDimensions().x() << " " << subMesh.getDimensions().x()*subMesh.getDimensions().y() << " " <<endl; - Entity.setCoordinates(coords); -// cout <<"c" << coords <<std::endl; - Entity.refresh(); -// cout << "d" <<endl; - neighborEntities.refresh(subMesh,Entity.getIndex()); -// cout << "e" <<endl; - fu[ i ] = schemeHost.getValue( this->subMesh, i, coords,u, time, boundaryCondition, neighborEntities ); -// std::cout << "f" <<endl; - } - maxResidue = fu. absMax(); - - - if( this -> cflCondition * maxResidue != 0.0) - currentTau = this -> cflCondition / maxResidue; - - /* if (maxResidue < 0.05) - std::cout << "Max < 0.05" <<std::endl;*/ - if(currentTau > 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()) - currentTau = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - /*if(maxResidue > lastResidue) - currentTau *=(1.0/10.0);*/ - - - if( time + currentTau > finalTime ) currentTau = finalTime - time; -// for( int i = 0; i < fu.getSize(); i ++ ) -// { -// //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] <<std::endl; -// if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 ) -// currentTau = fabs(u[i]/(2.0*fu[i])); -// -// } - - - for( int i = 0; i < fu.getSize(); i ++ ) - { - double add = u[i] + currentTau * fu[ i ]; - //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) ) - u[ i ] = add; - } - time += currentTau; - - //cout << '\r' << flush; - //cout << maxResidue << " " << currentTau << " @ " << time << flush; - //lastResidue = maxResidue; - } - //cout << "Time: " << time << ", Res: " << maxResidue <<endl; - /*if (u.max() > 0.0) - this->stopTime /=(double) this->gridCols;*/ - -// VectorType solution; -// solution.setLike(u); -// for( int i = 0; i < u.getSize(); i ++ ) -// { -// solution[i]=u[i]; -// } -// return solution; - return u; -} - - -#ifdef HAVE_CUDA - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ - //int j = threadIdx.x + threadIdx.y * blockDim.x; -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - //printf("i= %d,j= %d,th= %d\n",i,j,th); - *a = caller->work_u_cuda[index]; - //printf("Hi %f \n", *a); - //return ret; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA3D( const int i ,tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) -{ -// int j = threadIdx.x + threadIdx.y * blockDim.x; -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) - { - caller->work_u_cuda[index] = *a; - caller->unusedCell_cuda[index] = 0; - - } - - *a = caller->work_u_cuda[index]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA3D( double u, const int i ) -{ - - -// int j = threadIdx.x + threadIdx.y * blockDim.x; - //printf("j = %d, u = %f\n", j,u); - -// int index = (blockIdx.z*this->n + threadIdx.z) * this->n*this->n*this->gridCols*this->gridRows -// + (blockIdx.y) * this->n*this->n*this->gridCols -// + (blockIdx.x) * this->n -// + threadIdx.y * this->n*this->gridCols -// + threadIdx.x; - - int index = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - - //printf("i= %d,j= %d,index= %d\n",i,j,index); - if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) - { - this->work_u_cuda[index] = u; - this->unusedCell_cuda[index] = 0; - - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID) -{ - - __shared__ int tmp; - __shared__ double value; - //double tmpRes = 0.0; - volatile double* sharedTau = &u[blockDim.x*blockDim.y*blockDim.z]; -// volatile double* absVal = &u[2*blockDim.x*blockDim.y*blockDim.z]; - int i = threadIdx.x; - int j = threadIdx.y; - int k = threadIdx.z; - int l = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z*blockDim.x*blockDim.y; - bool computeFU = !((i == 0 && (boundaryCondition & 4)) or - (i == blockDim.x - 1 && (boundaryCondition & 2)) or - (j == 0 && (boundaryCondition & 8)) or - (j == blockDim.y - 1 && (boundaryCondition & 1))or - (k == 0 && (boundaryCondition & 32)) or - (k == blockDim.z - 1 && (boundaryCondition & 16))); - - if(l == 0) - { - tmp = 0; - int centerGID = (blockDim.y*blockIdx.y + (blockDim.y>>1) )*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1) - + ((blockDim.z>>1)+blockDim.z*blockIdx.z)*blockDim.x*blockDim.y*gridDim.x*gridDim.y; - if(this->unusedCell_cuda[centerGID] == 0 || boundaryCondition == 0) - tmp = 1; - } - __syncthreads(); - - - __syncthreads(); - if(tmp !=1) - { -// if(computeFU) -// absVal[l]=0.0; -// else -// absVal[l] = fabs(u[l]); -// -// __syncthreads(); -// -// if((blockDim.x == 16) && (l < 128)) absVal[l] = Max(absVal[l],absVal[l+128]); -// __syncthreads(); -// if((blockDim.x == 16) && (l < 64)) absVal[l] = Max(absVal[l],absVal[l+64]); -// __syncthreads(); -// if(l < 32) absVal[l] = Max(absVal[l],absVal[l+32]); -// if(l < 16) absVal[l] = Max(absVal[l],absVal[l+16]); -// if(l < 8) absVal[l] = Max(absVal[l],absVal[l+8]); -// if(l < 4) absVal[l] = Max(absVal[l],absVal[l+4]); -// if(l < 2) absVal[l] = Max(absVal[l],absVal[l+2]); -// if(l < 1) value = sign(u[0])*Max(absVal[l],absVal[l+1]); -// __syncthreads(); -// -// if(computeFU) -// u[l] = value; - if(computeFU) - { - tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Ent(subMesh); - if(boundaryCondition == 4) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(0,j,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.x+this->n); - } - else if(boundaryCondition == 2) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(blockDim.x - 1,j,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); - } - else if(boundaryCondition == 8) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,0,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(threadIdx.y) ;//+ 2*sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(threadIdx.y+this->n); - } - else if(boundaryCondition == 1) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,blockDim.y - 1,k)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); - } - else if(boundaryCondition == 32) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,0)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(threadIdx.z); - } - else if(boundaryCondition == 16) - { - Ent.setCoordinates(Containers::StaticVector<3,int>(i,j,blockDim.z - 1)); - Ent.refresh(); - u[l] = u[Ent.getIndex()];// + sign(u[0])*this->subMesh.template getSpaceStepsProducts< 0, 0, 1 >()*(this->n - 1 - threadIdx.z) ; - } - } - } - - double time = 0.0; - __shared__ double currentTau; - double cfl = this->cflCondition; - double fu = 0.0; -// if(threadIdx.x * threadIdx.y * threadIdx.z == 0) -// { -// currentTau = this->tau0; -// } - double finalTime = this->stopTime; - __syncthreads(); - if( boundaryCondition == 0 ) finalTime *= 2.0; - - tnlGridEntity<MeshType, 3, tnlGridEntityNoStencilStorage > Entity(subMesh); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - Entity.setCoordinates(Containers::StaticVector<3,int>(i,j,k)); - Entity.refresh(); - neighborEntities.refresh(subMesh,Entity.getIndex()); - - - while( time < finalTime ) - { - sharedTau[l]=finalTime; - - if(computeFU) - { - fu = schemeHost.getValueDev( this->subMesh, l, Containers::StaticVector<3,int>(i,j,k), u, time, boundaryCondition, neighborEntities); - if(abs(fu) > 0.0) - sharedTau[l]=abs(cfl/fu); - } - - if(l == 0) - { - if(sharedTau[0] > 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >()) sharedTau[0] = 0.5 * this->subMesh.template getSpaceStepsProducts< 1, 0, 0 >(); - } - else if(l == blockDim.x*blockDim.y*blockDim.z - 1) - { - if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; - } - - __syncthreads(); - if(l < 256) sharedTau[l] = Min(sharedTau[l],sharedTau[l+256]); - __syncthreads(); - if(l < 128) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); - __syncthreads(); - if(l < 64) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); - __syncthreads(); - if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); - __syncthreads(); - if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); - if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); - if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); - if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); - if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); - __syncthreads(); - -// if(abs(fu) < 10000.0) -// printf("bla"); - if(computeFU) - u[l] += currentTau * fu; - time += currentTau; - __syncthreads(); - } - - -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA3D(int i) const -{ - int j = i % (this->gridCols*this->gridRows*this->n*this->n); - - return ( (i / (this->gridCols*this->gridRows*this->n*this->n))*this->gridCols*this->gridRows - + (j / (this->gridCols*this->n*this->n))*this->gridCols - + (j % (this->gridCols*this->n))/this->n); -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA3D( int i ) const -{ - return this->subgridValues_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA3D(int i, int value) -{ - this->subgridValues_cuda[i] = value; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -int tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA3D( int i ) const -{ - return this->boundaryConditions_cuda[i]; -} - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__device__ -void tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA3D(int i, int value) -{ - this->boundaryConditions_cuda[i] = value; -} - - - -//north - 1, east - 2, west - 4, south - 8, up -16, down - 32 - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void /*tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now -{ - - __shared__ int boundary[6]; // north,east,west,south - __shared__ int subgridValue; - __shared__ int newSubgridValue; - - - int gid = blockDim.x*blockIdx.x + threadIdx.x + - (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + - (blockDim.z*blockIdx.z + threadIdx.z)*blockDim.x*gridDim.x*blockDim.y*gridDim.y; - double u = cudaSolver->work_u_cuda[gid]; - double u_cmp; - int subgridValue_cmp=INT_MAX; - int boundary_index=0; - - - if(threadIdx.x+threadIdx.y+threadIdx.z == 0) - { - subgridValue = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary[0] = 0; - boundary[1] = 0; - boundary[2] = 0; - boundary[3] = 0; - boundary[4] = 0; - boundary[5] = 0; - newSubgridValue = 0; -// printf("aaa z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); - } - __syncthreads(); - - - - if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.z == 0 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || - (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) || - (threadIdx.z == blockDim.z - 1 /* && (cudaSolver->currentStep & 1)*/) ) - { - if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y - 1); - boundary_index = 2; - } - - if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + 1]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y + 1); - boundary_index = 1; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y - 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary_index = 3; - } - if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D((blockIdx.y + 1)*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y); - boundary_index = 0; - } - - __threadfence(); - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - u=u_cmp; - } - __threadfence(); - - if(threadIdx.z == 0 && (blockIdx.z != 0)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z - 1)*gridDim.x*gridDim.y); - boundary_index = 5; - } - if(threadIdx.z == blockDim.z - 1 && (blockIdx.z != gridDim.z - 1)/* && (cudaSolver->currentStep & 1)*/) - { - u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x*blockDim.y*gridDim.y]; - subgridValue_cmp = cudaSolver->getSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + (blockIdx.z + 1)*gridDim.x*gridDim.y); - boundary_index = 4; - } - __threadfence(); - - if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) - { - cudaSolver->unusedCell_cuda[gid] = 0; - atomicMax(&newSubgridValue, INT_MAX); - atomicMax(&boundary[boundary_index], 1); - cudaSolver->work_u_cuda[gid] = u_cmp; - } - __threadfence(); - - } - __syncthreads(); - - if(threadIdx.x+threadIdx.y+threadIdx.z == 0) - { - - if(subgridValue == INT_MAX && newSubgridValue != 0) - cudaSolver->setSubgridValueCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, -INT_MAX); - - cudaSolver->setBoundaryConditionCUDA3D(blockIdx.y*gridDim.x + blockIdx.x + blockIdx.z*gridDim.x*gridDim.y, 1 * boundary[0] + - 2 * boundary[1] + - 4 * boundary[2] + - 8 * boundary[3] + - 16 * boundary[4] + - 32 * boundary[5] ); - if(blockIdx.x+blockIdx.y+blockIdx.z == 0) - { - cudaSolver->currentStep = cudaSolver->currentStep + 1; - *(cudaSolver->runcuda) = 0; - } - } -} - - - -template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void synchronize2CUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) -{ - int stepValue = cudaSolver->currentStep + 4; - if( cudaSolver->getSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) - cudaSolver->setSubgridValueCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x, stepValue); - - atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA3D(blockIdx.z*gridDim.x*gridDim.y + blockIdx.y*gridDim.x + blockIdx.x)); -} - - - - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device> -__global__ -void initCUDA3D( tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3) -{ - - - cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); - cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); - cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int)); - cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels*sizeof(int)); - cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); - *(cudaSolver->runcuda) = 1; - cudaSolver->currentStep = 1; - //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); - //ptr = cudaSolver->work_u_cuda; - printf("GPU memory allocated.\n"); - - for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->gridLevels; i++) - { - cudaSolver->subgridValues_cuda[i] = INT_MAX; - cudaSolver->boundaryConditions_cuda[i] = 0; - } - - /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) - { - printf("%d\n",j); - cudaSolver->unusedCell_cuda[ j] = 1; - }*/ - printf("GPU memory initialized.\n"); -} - - - - -//extern __shared__ double array[]; -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void initRunCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller) - -{ - - - extern __shared__ double u[]; - - int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - - __shared__ int containsCurve; - if(l == 0) - { -// printf("z = %d, y = %d, x = %d\n",blockIdx.z,blockIdx.y,blockIdx.x); - containsCurve = 0; - } - - caller->getSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - if(u[0] * u[l] <= 0.0) - { - atomicMax( &containsCurve, 1); - } - - __syncthreads(); - if(containsCurve == 1) - { - caller->runSubgridCUDA3D(0,u,i); - __syncthreads(); -// caller->insertSubgridCUDA3D(u[l],i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - - __syncthreads(); - if(l == 0) - caller->setSubgridValueCUDA3D(i, 4); - } - - -} - - - - - -template< typename SchemeHost, typename SchemeDevice, typename Device > -__global__ -void runCUDA3D(tnlParallelEikonalSolver<3,SchemeHost, SchemeDevice, Device, double, int >* caller) -{ - extern __shared__ double u[]; - int i = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; - int l = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; - int bound = caller->getBoundaryConditionCUDA3D(i); - - if(caller->getSubgridValueCUDA3D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA3D(i) > 0) - { - caller->getSubgridCUDA3D(i,caller, &u[l]); - - //if(l == 0) - //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA3D(i)); - if(caller->getSubgridValueCUDA3D(i) == caller->currentStep+4) - { - if(bound & 1) - { - caller->runSubgridCUDA3D(1,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 2 ) - { - caller->runSubgridCUDA3D(2,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 4) - { - caller->runSubgridCUDA3D(4,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 8) - { - caller->runSubgridCUDA3D(8,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 16) - { - caller->runSubgridCUDA3D(16,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound & 32) - { - caller->runSubgridCUDA3D(32,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - } - else - { - if( ((bound == 2))) - { - caller->runSubgridCUDA3D(2,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 1) )) - { - caller->runSubgridCUDA3D(1,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound == 8) )) - { - caller->runSubgridCUDA3D(8,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if((bound == 4)) - { - caller->runSubgridCUDA3D(4,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 16) - { - caller->runSubgridCUDA3D(16,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if(bound == 32) - { - caller->runSubgridCUDA3D(32,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - } - /* 1 2 4 8 16 32 */ - - if( ((bound & 19 ))) /* 1 1 0 0 1 0 */ - { - caller->runSubgridCUDA3D(19,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 21 ))) /* 1 0 1 0 1 0 */ - { - caller->runSubgridCUDA3D(21,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 26 ))) /* 0 1 0 1 1 0 */ - { - caller->runSubgridCUDA3D(26,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 28 )) /* 0 0 1 1 1 0 */ - { - caller->runSubgridCUDA3D(28,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - - - if( ((bound & 35 ))) /* 1 0 1 0 0 1 */ - { - caller->runSubgridCUDA3D(35,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 37 ))) /* 1 0 1 0 0 1 */ - { - caller->runSubgridCUDA3D(37,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( ((bound & 42 ))) /* 0 1 0 1 0 1 */ - { - caller->runSubgridCUDA3D(42,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - if( (bound & 44 )) /* 0 0 1 1 0 1 */ - { - caller->runSubgridCUDA3D(44,u,i); - caller->updateSubgridCUDA3D(i,caller, &u[l]); - __syncthreads(); - } - - if(l==0) - { - caller->setBoundaryConditionCUDA3D(i, 0); - caller->setSubgridValueCUDA3D(i, caller->getSubgridValueCUDA3D(i) - 1 ); - } - - - } - - - -} - -#endif /*HAVE_CUDA*/ - -#endif /* TNLPARALLELEIKONALSOLVER3D_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/CMakeLists.txt b/src/TNL/Legacy/narrow-band/CMakeLists.txt deleted file mode 100644 index 158cd20132..0000000000 --- a/src/TNL/Legacy/narrow-band/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -set( tnl_narrow_band_SOURCES -# MainBuildConfig.h -# tnlNarrowBand2D_impl.h -# tnlNarrowBand.h -# narrowBandConfig.h - main.cpp) - - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(narrow-band main.cu) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(narrow-band main.cpp) -ENDIF( BUILD_CUDA ) -target_link_libraries (narrow-band tnl ) - - -INSTALL( TARGETS narrow-band - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES ${tnl_narrow_band_SOURCES} -# DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/examples/narrow-band ) diff --git a/src/TNL/Legacy/narrow-band/MainBuildConfig.h b/src/TNL/Legacy/narrow-band/MainBuildConfig.h deleted file mode 100644 index ed3d686eb9..0000000000 --- a/src/TNL/Legacy/narrow-band/MainBuildConfig.h +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** - MainBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef MAINBUILDCONFIG_H_ -#define MAINBUILDCONFIG_H_ - -#include <solvers/tnlBuildConfigTags.h> - -class MainBuildConfig -{ - public: - - static void print() {std::cerr << "MainBuildConfig" <<std::endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< MainBuildConfig, Real >::enabled && - tnlConfigTagDevice< MainBuildConfig, Device >::enabled && - tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; -template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* MAINBUILDCONFIG_H_ */ diff --git a/src/TNL/Legacy/narrow-band/main.cpp b/src/TNL/Legacy/narrow-band/main.cpp deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/narrow-band/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/narrow-band/main.cu b/src/TNL/Legacy/narrow-band/main.cu deleted file mode 100644 index 8849008ff6..0000000000 --- a/src/TNL/Legacy/narrow-band/main.cu +++ /dev/null @@ -1,17 +0,0 @@ -/*************************************************************************** - main.cpp - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "main.h" diff --git a/src/TNL/Legacy/narrow-band/main.h b/src/TNL/Legacy/narrow-band/main.h deleted file mode 100644 index 51dbdac37c..0000000000 --- a/src/TNL/Legacy/narrow-band/main.h +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** - main.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - - -#include "MainBuildConfig.h" - //for HOST versions: -//#include "tnlNarrowBand.h" - //for DEVICE versions: -#include "tnlNarrowBand_CUDA.h" -#include "narrowBandConfig.h" -#include <solvers/tnlBuildConfigTags.h> - -#include <mesh/tnlGrid.h> -#include <core/tnlDevice.h> -#include <time.h> -#include <ctime> - -typedef MainBuildConfig BuildConfig; - -int main( int argc, char* argv[] ) -{ - time_t start; - time_t stop; - time(&start); - std::clock_t start2= std::clock(); - Config::ParameterContainer parameters; - tnlConfigDescription configDescription; - narrowBandConfig< BuildConfig >::configSetup( configDescription ); - - if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) - return false; - - const int& dim = parameters.getParameter< int >( "dim" ); - - if(dim == 2) - { - tnlNarrowBand<tnlGrid<2,double,TNL::Devices::Host, int>, double, int> solver; - if(!solver.init(parameters)) - { - cerr << "Solver failed to initialize." <<std::endl; - return EXIT_FAILURE; - } - TNL_CHECK_CUDA_DEVICE; - std::cout << "-------------------------------------------------------------" <<std::endl; - std::cout << "Starting solver..." <<std::endl; - solver.run(); - } -// else if(dim == 3) -// { -// tnlNarrowBand<tnlGrid<3,double,TNL::Devices::Host, int>, double, int> solver; -// if(!solver.init(parameters)) -// { -// cerr << "Solver failed to initialize." <<std::endl; -// return EXIT_FAILURE; -// } -// TNL_CHECK_CUDA_DEVICE; -// std::cout << "-------------------------------------------------------------" <<std::endl; -// std::cout << "Starting solver..." <<std::endl; -// solver.run(); -// } - else - { - std::cerr << "Unsupported number of dimensions: " << dim << "!" <<std::endl; - return EXIT_FAILURE; - } - - - time(&stop); - std::cout << "Solver stopped..." <<std::endl; - std::cout <<std::endl; - std::cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) <<std::endl; - return EXIT_SUCCESS; -} - - diff --git a/src/TNL/Legacy/narrow-band/narrowBandConfig.h b/src/TNL/Legacy/narrow-band/narrowBandConfig.h deleted file mode 100644 index bab58ceac4..0000000000 --- a/src/TNL/Legacy/narrow-band/narrowBandConfig.h +++ /dev/null @@ -1,40 +0,0 @@ -/*************************************************************************** - narrowBandConfig.h - description - ------------------- - begin : Oct 15, 2015 - copyright : (C) 2015 by Tomas Sobotik - email : - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef NARROWBANDCONFIG_H_ -#define NARROWBANDCONFIG_H_ - -#include <config/tnlConfigDescription.h> - -template< typename ConfigTag > -class narrowBandConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Narrow Band Solver solver settings:" ); - config.addEntry < String > ( "problem-name", "This defines particular problem.", "fast-sweeping" ); - config.addRequiredEntry < String > ( "initial-condition", "Initial condition for solver"); - config.addRequiredEntry < int > ( "dim", "Dimension of problem."); - config.addRequiredEntry < double > ( "tau", "Time step."); - config.addRequiredEntry < double > ( "final-time", "Final time."); - config.addEntry < String > ( "mesh", "Name of mesh.", "mesh.tnl" ); - config.addEntry < String > ( "exact-input", "Are the function values near the curve equal to the SDF? (yes/no)", "no" ); - } -}; - -#endif /* NARROWBANDCONFIG_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand.h deleted file mode 100644 index 7d3d19bc03..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand.h +++ /dev/null @@ -1,186 +0,0 @@ -/*************************************************************************** - tnlNarrowBand.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND_H_ -#define TNLNARROWBAND_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <functions/tnlMeshFunction.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> -#ifdef HAVE_OPENMP -#include <omp.h> -#endif - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlNarrowBand -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - - void setupSquare1000(Index i, Index j); - void setupSquare1100(Index i, Index j); - void setupSquare1010(Index i, Index j); - void setupSquare1001(Index i, Index j); - void setupSquare1110(Index i, Index j); - void setupSquare1101(Index i, Index j); - void setupSquare1011(Index i, Index j); - void setupSquare1111(Index i, Index j); - void setupSquare0000(Index i, Index j); - void setupSquare0100(Index i, Index j); - void setupSquare0010(Index i, Index j); - void setupSquare0001(Index i, Index j); - void setupSquare0110(Index i, Index j); - void setupSquare0101(Index i, Index j); - void setupSquare0011(Index i, Index j); - void setupSquare0111(Index i, Index j); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; - - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - - bool initGrid(); - bool run(); - - //for single core version use this implementation: - void updateValue(const Index i, const Index j, const Index k); - //for parallel version use this one instead: -// void updateValue(const Index i, const Index j, DofVectorType* grid); - - Real fabsMin(const Real x, const Real y); - - -protected: - - MeshType Mesh; - - bool exactInput; - - - tnlMeshFunction<MeshType> dofVector, dofVector2; - DofVectorType data; - - RealType h; - - tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage > Entity; - -#ifdef HAVE_OPENMP -// omp_lock_t* gridLock; -#endif - - -}; - - - //for single core version use this implementation: -#include "tnlNarrowBand2D_impl.h" - //for parallel version use this one instead: -// #include "tnlNarrowBand2D_openMP_impl.h" - -#include "tnlNarrowBand3D_impl.h" - -#endif /* TNLNARROWBAND_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h deleted file mode 100644 index dff0b48c8d..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v4_impl.h +++ /dev/null @@ -1,1317 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#define NARROWBAND_SUBGRID_SIZE 32 - -#include "tnlNarrowBand.h" - -#ifdef HAVE_CUDA -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} -#endif - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const -{ - if(arg > 0.0) - return arg; - return 0.0; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const -{ - if(arg < 0.0) - return -arg; - return 0.0; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - tau = parameters.getParameter< double >( "tau" ); - - finalTime = parameters.getParameter< double >( "final-time" ); - - statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaStatusVector), statusGridSize*statusGridSize*sizeof(int)); -// cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice); - - cudaMalloc(&reinitialize, sizeof(int)); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#endif - - int n = Mesh.getDimensions().x(); - - dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocks2(statusGridSize ,statusGridSize); - initSetupGridCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<<numBlocks2,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - /*dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1);*/ - initCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - cout << "Solver initialized." <<std::endl; - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlockFS(1, 512); - dim3 numBlocksFS(4,1); - dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1); - - double time = 0.0; - int reinit = 0; - - cout << "Hi!" <<std::endl; - runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cout << "Hi2!" <<std::endl; - while(time < finalTime) - { - if(tau+time > finalTime) - tau=finalTime-time; - - runNarrowBandCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver,tau); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - time += tau; - - - cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - if(reinit != 0 /*&& time != finalTime */) - { - cout << time <<std::endl; - - initSetupGridCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<<numBlocksNB,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - } - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * statusGridSize; - if(cudaStatusVector[subgridID] != 0 && i<Mesh.getDimensions().x() && j < Mesh.getDimensions().y()) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9))*/ ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17))*/ ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 /*|| (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3))*/ ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5)) */) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - } - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - if(abs(cudaDofVector2[gid]) > 1.5*h) - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]); - -// if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) -// { -// if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h; -// } -// if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; -// } -// -// if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h; -// } -// if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) -// { -// cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; -// cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; -// } -// } - - -// - - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - -} - - - - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - __shared__ double u0; - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - if(threadIdx.x+threadIdx.y == 0) - { -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - if(blockIdx.x+blockIdx.y == 0) - *(solver->reinitialize) = 0; - - solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0; - - u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0]; - } - __syncthreads(); - - double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x]; - - if(u*u0 <=0.0) - atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1); - } -// if(threadIdx.x+threadIdx.y == 0) - -// printf("Bye from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - -} - - - -// run this with one thread per block -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ -// printf("Hello\n"); - if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1) - { -// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// 8 - to the east of curve, 16 - to the west of curve. - if(blockIdx.x > 0) - { - atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); - } - - if(blockIdx.x < gridDim.x - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); - - if(blockIdx.y > 0 ) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4); - - if(blockIdx.y < gridDim.y - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2); - } - - -} - - - - - -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau) -{ - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x; - int i = threadIdx.x + blockIdx.x*blockDim.x; - int j = threadIdx.y + blockIdx.y*blockDim.y; - -// if(i+j == 0) -// printf("Hello\n"); - - int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/ - - int status = solver->cudaStatusVector[blockID]; - - if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j) - { - - if(status != 0) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh); - Entity.setCoordinates(Containers::StaticVector<2,double>(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - double value = solver->cudaDofVector2[Entity.getIndex()]; - double xf,xb,yf,yb, grad, fu, a,b; - a = b = 0.0; - - if( i == 0 || (threadIdx.x == 0 && !(status & 9)) ) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - else if( i == solver->Mesh.getDimensions().x() - 1 || (threadIdx.x == blockDim.x - 1 && !(status & 17)) ) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()] - value; - } - else - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - - if( j == 0 || (threadIdx.y == 0 && !(status & 3)) ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - else if( j == solver->Mesh.getDimensions().y() - 1 || (threadIdx.y == blockDim.y - 1 && !(status & 5)) ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()] - value; - } - else - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - __syncthreads(); - - - - - - if(sign(value) >= 0.0) - { - xf = solver->negativePart(xf); - - xb = solver->positivePart(xb); - - yf = solver->negativePart(yf); - - yb = solver->positivePart(yb); - - } - else - { - - xb = solver->negativePart(xb); - - xf = solver->positivePart(xf); - - yb = solver->negativePart(yb); - - yf = solver->positivePart(yf); - } - - - if(xb > xf) - a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - else - a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - if(yb > yf) - b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - else - b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - - - -// grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - grad = sqrt(/*0.5 **/ (a*a + b*b ) ); - - fu = -1.0 * grad; - - if((tau*fu+value)*value <=0 ) - { - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - - if((threadIdx.x == 6 && !(status & 9)) && (blockIdx.x > 0) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.x == blockDim.x - 7 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.y == 6 && !(status & 3)) && (blockIdx.y > 0) ) - atomicMax(solver->reinitialize,1); - else if((threadIdx.y == blockDim.y - 7 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) - atomicMax(solver->reinitialize,1); - } - - solver->cudaDofVector2[Entity.getIndex()] += tau*fu; - } - } -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h deleted file mode 100644 index c928104900..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_CUDA_v5_impl.h +++ /dev/null @@ -1,1313 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#define NARROWBAND_SUBGRID_SIZE 32 - -#include "tnlNarrowBand.h" - -__device__ -double fabsMin( double x, double y) -{ - double fx = abs(x); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; -} - -__device__ -double atomicFabsMin(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); - } while (assumed != old); - return __longlong_as_double(old); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const -{ - if(arg > 0.0) - return arg; - return 0.0; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const -{ - if(arg < 0.0) - return -arg; - return 0.0; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:dofVector(Mesh) -{ -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - //Entity.refresh(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - tau = parameters.getParameter< double >( "tau" ); - - finalTime = parameters.getParameter< double >( "final-time" ); - - statusGridSize = ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaStatusVector), statusGridSize*statusGridSize*sizeof(int)); -// cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), statusGridSize*statusGridSize* sizeof(int)), cudaMemcpyHostToDevice); - - cudaMalloc(&reinitialize, sizeof(int)); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#endif - - int n = Mesh.getDimensions().x(); - - dim3 threadsPerBlock2(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocks2(statusGridSize ,statusGridSize); - initSetupGridCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<<numBlocks2,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - /*dim3 threadsPerBlock(16, 16); - dim3 numBlocks(n/16 + 1 ,n/16 +1);*/ - initCUDA<<<numBlocks2,threadsPerBlock2>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - - cout << "Solver initialized." <<std::endl; - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlockFS(1, 512); - dim3 numBlocksFS(4,1); - dim3 threadsPerBlockNB(NARROWBAND_SUBGRID_SIZE, NARROWBAND_SUBGRID_SIZE); - dim3 numBlocksNB(n/NARROWBAND_SUBGRID_SIZE + 1,n/NARROWBAND_SUBGRID_SIZE + 1); - - double time = 0.0; - int reinit = 0; - - cout << "Hi!" <<std::endl; - runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - cout << "Hi2!" <<std::endl; - while(time < finalTime) - { - if(tau+time > finalTime) - tau=finalTime-time; - - runNarrowBandCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver,tau); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - time += tau; - - - cudaMemcpy(&reinit, this->reinitialize, sizeof(int), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - if(reinit != 0 /*&& time != finalTime */) - { - cout << time <<std::endl; - - initSetupGridCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initSetupGrid2CUDA<<<numBlocksNB,1>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<<numBlocksNB,threadsPerBlockNB>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - runCUDA<<<numBlocksFS,threadsPerBlockFS>>>(this->cudaSolver,0,0); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - } - } - - //data.setLike(dofVector.getData()); - //cudaMemcpy(data.getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaMemcpy(dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - //data.save("u-00001.tnl"); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, - // 8 - to the east of curve, 16 - to the west of curve. - int subgridID = i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE); - if(/*cudaStatusVector[subgridID] != 0 &&*/ i<Mesh.getDimensions().x() && Mesh.getDimensions().y()) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 /*|| (i/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 9)) */) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 /*|| (i/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 17)) */) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0/* || (j/NARROWBAND_SUBGRID_SIZE == 0 && !(cudaStatusVector[subgridID] & 3)) */) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 /* || (j/NARROWBAND_SUBGRID_SIZE == NARROWBAND_SUBGRID_SIZE - 1 && !(cudaStatusVector[subgridID] & 5))*/ ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(abs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - // cudaDofVector2[Entity.getIndex()] = fabsMin(value, tmp); - atomicFabsMin(&(cudaDofVector2[Entity.getIndex()]), tmp); - } - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - - - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - solver->initGrid(); - } - - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - int i = threadIdx.x + blockDim.x*blockIdx.x; - int j = blockDim.y*blockIdx.y + threadIdx.y; - - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - int gid = Entity.getIndex(); - - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector2[gid]); - - if (i >0 && j > 0 && i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y()) - { - if(cudaDofVector2[gid]*cudaDofVector2[gid+1] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid+1] = sign(cudaDofVector2[gid+1])*0.5*h; - } - if( cudaDofVector2[gid]*cudaDofVector2[gid+Mesh.getDimensions().x()] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid+Mesh.getDimensions().x()] = sign(cudaDofVector2[gid+Mesh.getDimensions().x()])*0.5*h; - } - - if(cudaDofVector2[gid]*cudaDofVector2[gid-1] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid-1] = sign(cudaDofVector2[gid-1])*0.5*h; - } - if( cudaDofVector2[gid]*cudaDofVector2[gid-Mesh.getDimensions().x()] <= 0 ) - { - cudaDofVector2[gid] = sign(cudaDofVector2[gid])*0.5*h; - cudaDofVector2[gid-Mesh.getDimensions().x()] = sign(cudaDofVector2[gid-Mesh.getDimensions().x()])*0.5*h; - } - } - - -// - - - - - - -// if(i+1 < Mesh.getDimensions().x() && j+1 < Mesh.getDimensions().y() ) -// { -// if(cudaDofVector[Entity.getIndex()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1111(i,j); -// else -// setupSquare1110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1101(i,j); -// else -// setupSquare1100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1011(i,j); -// else -// setupSquare1010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare1001(i,j); -// else -// setupSquare1000(i,j); -// } -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0111(i,j); -// else -// setupSquare0110(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0101(i,j); -// else -// setupSquare0100(i,j); -// } -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0011(i,j); -// else -// setupSquare0010(i,j); -// } -// else -// { -// if(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) -// setupSquare0001(i,j); -// else -// setupSquare0000(i,j); -// } -// } -// } -// -// } - - return true; - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - //Real fy = abs(y); - - //Real tmpMin = Min(fx,abs(y)); - - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - - int gx = 0; - int gy = threadIdx.y; - //if(solver->Mesh.getDimensions().x() <= gx || solver->Mesh.getDimensions().y() <= gy) - // return; - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - //int gid = solver->Mesh.getDimensions().x() * gy + gx; - //int max = solver->Mesh.getDimensions().x()*solver->Mesh.getDimensions().x(); - - //int id1 = gx+gy; - //int id2 = (solver->Mesh.getDimensions().x() - gx - 1) + gy; - - if(blockIdx.x==0) - { - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==2) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - else if(blockIdx.x==3) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - -} - - - - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ - __shared__ double u0; - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy) - { - -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - if(threadIdx.x+threadIdx.y == 0) - { -// printf("Hello from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - if(blockIdx.x+blockIdx.y == 0) - *(solver->reinitialize) = 0; - - solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] = 0; - - u0 = solver->cudaDofVector2[(blockDim.y*blockIdx.y + 0)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + 0]; - } - __syncthreads(); - - double u = solver->cudaDofVector2[(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x]; - - if(u*u0 <=0.0) - atomicMax(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y]),1); - } -// if(threadIdx.x+threadIdx.y == 0) - -// printf("Bye from block = %d, thread = %d, x = %d, y = %d\n", blockIdx.x + gridDim.x*blockIdx.y,(blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x() + blockDim.x*blockIdx.x + threadIdx.x, threadIdx.x, threadIdx.y); - - -} - - - -// run this with one thread per block -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver) -{ -// printf("Hello\n"); - if(solver->cudaStatusVector[blockIdx.x + gridDim.x*blockIdx.y] == 1) - { -// 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// 8 - to the east of curve, 16 - to the west of curve. - if(blockIdx.x > 0) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x - 1 + gridDim.x*blockIdx.y]), 16); - - if(blockIdx.x < gridDim.x - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + 1 + gridDim.x*blockIdx.y]), 8); - - if(blockIdx.y > 0 ) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y - 1)]), 4); - - if(blockIdx.y < gridDim.y - 1) - atomicAdd(&(solver->cudaStatusVector[blockIdx.x + gridDim.x*(blockIdx.y + 1)]), 2); - } - - -} - - - - - -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau) -{ - int gid = (blockDim.y*blockIdx.y + threadIdx.y)*solver->Mesh.getDimensions().x()+ threadIdx.x; - int i = threadIdx.x + blockIdx.x*blockDim.x; - int j = threadIdx.y + blockIdx.y*blockDim.y; - -// if(i+j == 0) -// printf("Hello\n"); - - int blockID = blockIdx.x + blockIdx.y*gridDim.x; /*i/NARROWBAND_SUBGRID_SIZE + (j/NARROWBAND_SUBGRID_SIZE) * ((Mesh.getDimensions().x() + NARROWBAND_SUBGRID_SIZE-1 ) / NARROWBAND_SUBGRID_SIZE);*/ - - int status = solver->cudaStatusVector[blockID]; - - if(solver->Mesh.getDimensions().x() > i && solver->Mesh.getDimensions().y() > j) - { - -// if(status != 0) - { - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(solver->Mesh); - Entity.setCoordinates(Containers::StaticVector<2,double>(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - double value = solver->cudaDofVector2[Entity.getIndex()]; - double xf,xb,yf,yb, grad, fu, a,b; - a = b = 0.0; - - if( i == 0 /*|| (threadIdx.x == 0 && !(status & 9)) */) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - else if( i == solver->Mesh.getDimensions().x() - 1 /*|| (threadIdx.x == blockDim.x - 1 && !(status & 17)) */) - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()] - value; - } - else - { - xb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - xf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] - value; - } - - if( j == 0/* || (threadIdx.y == 0 && !(status & 3))*/ ) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - else if( j == solver->Mesh.getDimensions().y() - 1 /*|| (threadIdx.y == blockDim.y - 1 && !(status & 5)) */) - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()] - value; - } - else - { - yb = value - solver->cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - yf = solver-> cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] - value; - } - __syncthreads(); - - - - - - if(sign(value) > 0.0) - { - xf = solver->negativePart(xf); - - xb = solver->positivePart(xb); - - yf = solver->negativePart(yf); - - yb = solver->positivePart(yb); - - } - else - { - - xb = solver->negativePart(xb); - - xf = solver->positivePart(xf); - - yb = solver->negativePart(yb); - - yf = solver->positivePart(yf); - } - - - if(xb > xf) - a = xb*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - else - a = xf*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - if(yb > yf) - b = yb*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - else - b = yf*solver->Mesh.template getSpaceStepsProducts< 0, -1 >(); - - - -// grad = sqrt(0.5 * (xf*xf + xb*xb + yf*yf + yb*yb ) )*solver->Mesh.template getSpaceStepsProducts< -1, 0 >(); - - grad = sqrt(/*0.5 **/ (a*a + b*b ) ); - - fu = -1.0 * grad; - -// if((tau*fu+value)*value <=0 ) -// { -// // 1 - with curve, 2 - to the north of curve, 4 - to the south of curve, -// // 8 - to the east of curve, 16 - to the west of curve. -// -// if((threadIdx.x == 1 && !(status & 9)) && (blockIdx.x > 0) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.x == blockDim.x - 2 && !(status & 17)) && (blockIdx.x < gridDim.x - 1) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.y == 1 && !(status & 3)) && (blockIdx.y > 0) ) -// atomicMax(solver->reinitialize,1); -// else if((threadIdx.y == blockDim.y - 2 && !(status & 5)) && (blockIdx.y < gridDim.y - 1) ) -// atomicMax(solver->reinitialize,1); -// } - - solver->cudaDofVector2[Entity.getIndex()] += tau*fu; - } - } -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[Entity.getIndex()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - Real al,be, a,b,c,s; - al=abs(cudaDofVector[Entity.getIndex()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - cudaDofVector[Entity.getIndex()])); - - be=abs(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - cudaDofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - tnlGridEntity< tnlGrid< 2,double, TNL::Devices::Host, int >, 2, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - cudaDofVector2[Entity.getIndex()]=fabsMin(cudaDofVector[Entity.getIndex()],cudaDofVector2[Entity.getIndex()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 0, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 1 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(cudaDofVector[neighborEntities.template getEntityIndex< 1, 0 >()],cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h deleted file mode 100644 index d42bc2a761..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand2D_impl.h +++ /dev/null @@ -1,927 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND2D_IMPL_H_ -#define TNLNARROWBAND2D_IMPL_H_ - -#include "tnlNarrowBand.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - dofVector2.load(initialCondition); - - h = Mesh.template getSpaceStepsProducts< 1, 0 >(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - cout << "a" <<std::endl; - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().x();i++) - { - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - for(int i = 0 ; i < Mesh.getDimensions().x()-1; i++) - { - for(int j = 0 ; j < Mesh.getDimensions().x()-1; j++) - { - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - neighborEntities.refresh(Mesh,Entity.getIndex()); - - if(dofVector[this->Entity.getIndex()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1111(i,j); - else - setupSquare1110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1101(i,j); - else - setupSquare1100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1011(i,j); - else - setupSquare1010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare1001(i,j); - else - setupSquare1000(i,j); - } - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0111(i,j); - else - setupSquare0110(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0101(i,j); - else - setupSquare0100(i,j); - } - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()] > 0) - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0011(i,j); - else - setupSquare0010(i,j); - } - else - { - if(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()] > 0) - setupSquare0001(i,j); - else - setupSquare0000(i,j); - } - } - } - - } - } - cout << "a" <<std::endl; - -// Real tmp = 0.0; -// Real ax=0.5/sqrt(2.0); -// -// if(!exactInput) -// { -// for(Index i = 0; i < Mesh.getDimensions().x()*Mesh.getDimensions().y(); i++) -// dofVector[i]=0.5*h*sign(dofVector[i]); -// } -// -// -// for(Index i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// for(Index j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// } -// -// -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// Index j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int i = 1; i < Mesh.getDimensions().x()-1; i++) -// { -// Index j = Mesh.getDimensions().y() - 1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// for(int j = 1; j < Mesh.getDimensions().y()-1; j++) -// { -// Index i = Mesh.getDimensions().x() - 1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// -// -// if(tmp == 0.0) -// {} -// else if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp < 0.0 || -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp < 0.0 ) -// {} -// else -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// } -// -// -// Index i = Mesh.getDimensions().x() - 1; -// Index j = Mesh.getDimensions().y() - 1; -// -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i-1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// i = 0; -// j = Mesh.getDimensions().y() -1; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j-1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; -// -// -// -// j = 0; -// tmp = sign(dofVector[Mesh.getCellIndex(CoordinatesType(i,j))]); -// if(dofVector[Mesh.getCellIndex(CoordinatesType(i+1,j))]*tmp > 0.0 && -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j+1))]*tmp > 0.0) -// -// dofVector[Mesh.getCellIndex(CoordinatesType(i,j))] = tmp*INT_MAX; - - //data.setLike(dofVector2.getData()); - //data=dofVector2.getData(); - //cout << data.getType() <<std::endl; - dofVector2.save("u-00000.tnl"); - //dofVector2.getData().save("u-00000.tnl"); - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j); - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - -// data.setLike(dofVector2.getData()); -// data = dofVector2.getData(); -// cout << data.getType() <<std::endl; - dofVector2.save("u-00001.tnl"); - //dofVector2.getData().save("u-00001.tnl"); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j) -{ - - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighborEntities(Entity); - - Real value = dofVector2[Entity.getIndex()]; - Real a,b, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0 >()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1 >()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()] ); - } - - - if(fabs(a-b) >= h) - tmp = fabsMin(a,b) + sign(value)*h; - else - tmp = 0.5 * (a + b + sign(value)*sqrt(2.0 * h * h - (a - b) * (a - b) ) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); - -// if(dofVector2[Entity.getIndex()] > 1.0) -// cout << value << " " << tmp << " " << dofVector2[Entity.getIndex()] <<std::endl; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(INT_MAX,dofVector2[Entity.getIndex()]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -{ -// this->Entity.setCoordinates(CoordinatesType(i,j)); -// this->Entity.refresh(); -// auto neighborEntities = Entity.getNeighborEntities(); -// dofVector2[Entity.getIndex()]=fabsMin(-INT_MAX,dofVector2[(Entity.getIndex())]); -// dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); -// dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-INT_MAX,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[Entity.getIndex()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - a = be/al; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(-abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 1, 0 >()])); - - a = al-be; - b=1.0; - c=-al; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - Real al,be, a,b,c,s; - al=abs(dofVector[Entity.getIndex()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 0 >()]- - dofVector[Entity.getIndex()])); - - be=abs(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()]/ - (dofVector[neighborEntities.template getEntityIndex< 1, 1 >()]- - dofVector[neighborEntities.template getEntityIndex< 0, 1 >()])); - - a = al-be; - b=1.0; - c=-be; - s= h/sqrt(a*a+b*b); - - - dofVector2[Entity.getIndex()]=fabsMin(-abs(a*0+b*0+c)*s,dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(-abs(a*1+b*0+c)*s,dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(abs(a*1+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(abs(a*0+b*1+c)*s,dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); - -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -{ - this->Entity.setCoordinates(CoordinatesType(i,j)); - this->Entity.refresh(); - auto neighborEntities = Entity.getNeighborEntities(); - dofVector2[Entity.getIndex()]=fabsMin(dofVector[Entity.getIndex()],dofVector2[(Entity.getIndex())]); - dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 0, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 0, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 1 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 1 >()]); - dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]=fabsMin(dofVector[neighborEntities.template getEntityIndex< 1, 0 >()],dofVector2[neighborEntities.template getEntityIndex< 1, 0 >()]); -} - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h deleted file mode 100644 index d362f249a7..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_CUDA_impl.h +++ /dev/null @@ -1,961 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_CUDA_v4_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND3D_IMPL_H_ -#define TNLNARROWBAND3D_IMPL_H_ - -#include "tnlNarrowBand.h" - -//__device__ -//double fabsMin( double x, double y) -//{ -// double fx = abs(x); -// -// if(Min(fx,abs(y)) == fx) -// return x; -// else -// return y; -//} -// -//__device__ -//double atomicFabsMin(double* address, double val) -//{ -// unsigned long long int* address_as_ull = -// (unsigned long long int*)address; -// unsigned long long int old = *address_as_ull, assumed; -// do { -// assumed = old; -// old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(assumed,val) )); -// } while (assumed != old); -// return __longlong_as_double(old); -//} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - - this->h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - counter = 0; - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; - - -#ifdef HAVE_CUDA - - cudaMalloc(&(cudaDofVector), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - cudaMalloc(&(cudaDofVector2), this->dofVector.getData().getSize()*sizeof(double)); - cudaMemcpy(cudaDofVector2, this->dofVector.getData().getData(), this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyHostToDevice); - - - cudaMalloc(&(this->cudaSolver), sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >)); - cudaMemcpy(this->cudaSolver, this,sizeof(tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >), cudaMemcpyHostToDevice); - -#endif - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(8, 8,8); - dim3 numBlocks(n/8 + 1, n/8 +1, n/8 +1); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - initCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - return true; -} - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - int n = Mesh.getDimensions().x(); - dim3 threadsPerBlock(1, 512); - dim3 numBlocks(8,1); - - - runCUDA<<<numBlocks,threadsPerBlock>>>(this->cudaSolver,0,0); - - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; - - cudaMemcpy(this->dofVector.getData().getData(), cudaDofVector2, this->dofVector.getData().getSize()*sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - cudaFree(cudaDofVector); - cudaFree(cudaDofVector2); - cudaFree(cudaSolver); - dofVector.save("u-00001.tnl"); - cudaDeviceSynchronize(); - return true; -} - - - - -#ifdef HAVE_CUDA - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - Real value = cudaDofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 1, 0, 0 >()] ); - } - - if( j == 0 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()]; - else - { - b = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, -1, 0 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 1, 0 >()] ); - } - - if( k == 0 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()]; - else - { - c = fabsMin( cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, -1 >()], - cudaDofVector2[neighborEntities.template getEntityIndex< 0, 0, 1 >()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a + b*b + c*c - a*b - a*c - b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - atomicFabsMin(&cudaDofVector2[Entity.getIndex()],tmp); - -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid(int i, int j, int k) -{ - tnlGridEntity< tnlGrid< 3,double, TNL::Devices::Host, int >, 3, tnlGridEntityNoStencilStorage > Entity(Mesh); - Entity.setCoordinates(CoordinatesType(i,j,k)); - Entity.refresh(); - int gid = Entity.getIndex(); - - if(abs(cudaDofVector[gid]) < 1.8*h) - cudaDofVector2[gid] = cudaDofVector[gid]; - else - cudaDofVector2[gid] = INT_MAX*sign(cudaDofVector[gid]); - - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -__device__ -Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = abs(x); - if(Min(fx,abs(y)) == fx) - return x; - else - return y; - - -} - - - -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i) -{ - - int gx = 0; - int gy = threadIdx.y; - - int n = solver->Mesh.getDimensions().x(); - int blockCount = n/blockDim.y +1; - - if(blockIdx.x==0) - { - for(int gz = 0; gz < n;gz++) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - __syncthreads(); - } - } - else if(blockIdx.x==1) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==2) - { - - for(int gz = 0; gz < n;gz++) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==3) - { - for(int gz = 0; gz < n;gz++) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - - else if(blockIdx.x==4) - { - for(int gz = n-1; gz > -1;gz--) - { - gx = 0; - gy = threadIdx.y; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==5) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=threadIdx.y; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy < n) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy+=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==6) - { - - for(int gz = n-1; gz > -1;gz--) - { - gx=0; - gy=n-threadIdx.y-1; - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx++; - if(gx==n) - { - gx=0; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - else if(blockIdx.x==7) - { - for(int gz = n-1; gz > -1;gz--) - { - gx=n-1; - gy=n-threadIdx.y-1; - - for(int k = 0; k < n*blockCount + blockDim.y; k++) - { - if(threadIdx.y < k+1 && gy > -1) - { - solver->updateValue(gx,gy,gz); - gx--; - if(gx==-1) - { - gx=n-1; - gy-=blockDim.y; - } - } - - - __syncthreads(); - } - } - } - - - - -} - - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver) -{ - int gx = threadIdx.x + blockDim.x*blockIdx.x; - int gy = blockDim.y*blockIdx.y + threadIdx.y; - int gz = blockDim.z*blockIdx.z + threadIdx.z; - - if(solver->Mesh.getDimensions().x() > gx && solver->Mesh.getDimensions().y() > gy && solver->Mesh.getDimensions().z() > gz) - { - solver->initGrid(gx,gy,gz); - } - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(-INT_MAX,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-INT_MAX,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0111( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1000( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// a = be/al; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1100( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1010( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(-abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare1001( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -// -// -// -// -// -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0011( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)])); -// -// a = al-be; -// b=1.0; -// c=-al; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0101( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// Real al,be, a,b,c,s; -// al=abs(cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,0>(index)])); -// -// be=abs(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)]/ -// (cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)]- -// cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)])); -// -// a = al-be; -// b=1.0; -// c=-be; -// s= h/sqrt(a*a+b*b); -// -// -// cudaDofVector2[index]=fabsMin(-abs(a*0+b*0+c)*s,cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(-abs(a*1+b*0+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(abs(a*1+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(abs(a*0+b*1+c)*s,cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -// -//} -// -//template< typename MeshReal, -// typename Device, -// typename MeshIndex, -// typename Real, -// typename Index > -//void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: setupSquare0110( Index i, Index j) -//{ -// Index index = Mesh.getCellIndex(CoordinatesType(i,j)); -// cudaDofVector2[index]=fabsMin(cudaDofVector[index],cudaDofVector2[(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<0,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<0,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,1>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,1>(index)]); -// cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]=fabsMin(cudaDofVector[Mesh.template getCellNextToCell<1,0>(index)],cudaDofVector2[Mesh.template getCellNextToCell<1,0>(index)]); -//} -#endif - - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h deleted file mode 100644 index 6e63d527b9..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand3D_impl.h +++ /dev/null @@ -1,307 +0,0 @@ -/*************************************************************************** - tnlNarrowBand2D_impl.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND3D_IMPL_H_ -#define TNLNARROWBAND3D_IMPL_H_ - -#include "tnlNarrowBand.h" - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: getType() -{ - return String( "tnlNarrowBand< " ) + - MeshType::getType() + ", " + - ::getType< Real >() + ", " + - ::getType< Index >() + " >"; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: tnlNarrowBand() -:Entity(Mesh), - dofVector(Mesh), - dofVector2(Mesh) -{ -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: init( const Config::ParameterContainer& parameters ) -{ - const String& meshFile = parameters.getParameter< String >( "mesh" ); - - if( ! Mesh.load( meshFile ) ) - { - std::cerr << "I am not able to load the mesh from the file " << meshFile << "." <<std::endl; - return false; - } - - - const String& initialCondition = parameters.getParameter <String>("initial-condition"); - if( ! dofVector.load( initialCondition ) ) - { - std::cerr << "I am not able to load the initial condition from the file " << meshFile << "." <<std::endl; - return false; - } - dofVector2.load(initialCondition); - - h = Mesh.template getSpaceStepsProducts< 1, 0, 0 >(); - Entity.refresh(); - - const String& exact_input = parameters.getParameter< String >( "exact-input" ); - - if(exact_input == "no") - exactInput=false; - else - exactInput=true; -// cout << "bla "<<endl; - return initGrid(); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: initGrid() -{ - for(int i=0; i< Mesh.getDimensions().x()*Mesh.getDimensions().y()*Mesh.getDimensions().z();i++) - { - - if (abs(dofVector[i]) < 1.8*h) - dofVector2[i]=dofVector[i]; - else - dofVector2[i]=INT_MAX*sign(dofVector[i]); - } - - return true; -} - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -bool tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: run() -{ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = 0; k < Mesh.getDimensions().z(); k++) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - - - - - - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = 0; j < Mesh.getDimensions().y(); j++) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = Mesh.getDimensions().x() - 1; i > -1; i--) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - for(Index k = Mesh.getDimensions().z() -1; k > -1; k--) - { - for(Index i = 0; i < Mesh.getDimensions().x(); i++) - { - for(Index j = Mesh.getDimensions().y() - 1; j > -1; j--) - { - updateValue(i,j,k); - } - } - } - -/*---------------------------------------------------------------------------------------------------------------------------*/ - - - dofVector2.save("u-00001.tnl"); - - cout << "bla 3"<<endl; - return true; -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -void tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: updateValue( Index i, Index j, Index k) -{ - this->Entity.setCoordinates(CoordinatesType(i,j,k)); - this->Entity.refresh(); - tnlNeighborGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighborEntities(Entity); - Real value = dofVector2[Entity.getIndex()]; - Real a,b,c, tmp; - - if( i == 0 ) - a = dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()]; - else if( i == Mesh.getDimensions().x() - 1 ) - a = dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0 >()]; - else - { - a = fabsMin( dofVector2[neighborEntities.template getEntityIndex< -1, 0, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 1, 0, 0>()] ); - } - - if( j == 0 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()]; - else if( j == Mesh.getDimensions().y() - 1 ) - b = dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()]; - else - { - b = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, -1, 0>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 1, 0>()] ); - } - - if( k == 0 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()]; - else if( k == Mesh.getDimensions().z() - 1 ) - c = dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()]; - else - { - c = fabsMin( dofVector2[neighborEntities.template getEntityIndex< 0, 0, -1>()], - dofVector2[neighborEntities.template getEntityIndex< 0, 0, 1>()] ); - } - - Real hD = 3.0*h*h - 2.0*(a*a+b*b+c*c-a*b-a*c-b*c); - - if(hD < 0.0) - tmp = fabsMin(a,fabsMin(b,c)) + sign(value)*h; - else - tmp = (1.0/3.0) * ( a + b + c + sign(value)*sqrt(hD) ); - - - dofVector2[Entity.getIndex()] = fabsMin(value, tmp); -} - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -Real tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > :: fabsMin( Real x, Real y) -{ - Real fx = fabs(x); - Real fy = fabs(y); - - Real tmpMin = Min(fx,fy); - - if(tmpMin == fx) - return x; - else - return y; - -} - - - -#endif /* TNLNARROWBAND_IMPL_H_ */ diff --git a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h b/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h deleted file mode 100644 index ca9b1da2cc..0000000000 --- a/src/TNL/Legacy/narrow-band/tnlNarrowBand_CUDA.h +++ /dev/null @@ -1,203 +0,0 @@ -/*************************************************************************** - tnlNarrowBand_CUDA.h - description - ------------------- - begin : Oct 15 , 2015 - copyright : (C) 2015 by Tomas Sobotik - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ -#ifndef TNLNARROWBAND_H_ -#define TNLNARROWBAND_H_ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/StaticVector.h> -#include <TNL/Devices/Host.h> -#include <mesh/tnlGrid.h> -#include <mesh/grids/tnlGridEntity.h> - -#include <functions/tnlMeshFunction.h> -#include <limits.h> -#include <core/tnlDevice.h> -#include <ctime> - - - - - -template< typename Mesh, - typename Real, - typename Index > -class tnlNarrowBand -{}; - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 2, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - tnlNarrowBand(); - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - bool run(); -#ifdef HAVE_CUDA - __device__ __host__ -#endif - RealType positivePart(const RealType arg) const; -#ifdef HAVE_CUDA - __device__ __host__ -#endif - RealType negativePart(const RealType arg) const; - -#ifdef HAVE_CUDA - __device__ bool initGrid(); - __device__ void updateValue(const Index i, const Index j); - __device__ void updateValue(const Index i, const Index j, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlNarrowBand< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int* cudaStatusVector; - int counter; - int* reinitialize; - __device__ void setupSquare1000(Index i, Index j); - __device__ void setupSquare1100(Index i, Index j); - __device__ void setupSquare1010(Index i, Index j); - __device__ void setupSquare1001(Index i, Index j); - __device__ void setupSquare1110(Index i, Index j); - __device__ void setupSquare1101(Index i, Index j); - __device__ void setupSquare1011(Index i, Index j); - __device__ void setupSquare1111(Index i, Index j); - __device__ void setupSquare0000(Index i, Index j); - __device__ void setupSquare0100(Index i, Index j); - __device__ void setupSquare0010(Index i, Index j); - __device__ void setupSquare0001(Index i, Index j); - __device__ void setupSquare0110(Index i, Index j); - __device__ void setupSquare0101(Index i, Index j); - __device__ void setupSquare0011(Index i, Index j); - __device__ void setupSquare0111(Index i, Index j); -#endif - - MeshType Mesh; - -protected: - - int statusGridSize; - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data; - - - RealType h, tau, finalTime; - - -}; - - - - - - - - - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -class tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > -{ - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef tnlGrid< 3, Real, Device, Index > MeshType; - typedef TNL::Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::CoordinatesType CoordinatesType; - - - - static String getType(); - bool init( const Config::ParameterContainer& parameters ); - bool run(); - -#ifdef HAVE_CUDA - __device__ bool initGrid(int i, int j, int k); - __device__ void updateValue(const Index i, const Index j, const Index k); - __device__ void updateValue(const Index i, const Index j, const Index k, double** sharedMem, const int k3); - __device__ Real fabsMin(const Real x, const Real y); - - tnlNarrowBand< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index >* cudaSolver; - double* cudaDofVector; - double* cudaDofVector2; - int counter; -#endif - - MeshType Mesh; - -protected: - - - - bool exactInput; - - tnlMeshFunction<MeshType> dofVector; - DofVectorType data; - - RealType h; - - -}; - - - - - - - -#ifdef HAVE_CUDA -//template<int sweep_t> -__global__ void runCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); -//__global__ void runCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver, int sweep, int i); - -__global__ void initCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); - -__global__ void initSetupGridCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initSetupGrid2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void initSetupGrid1_2CUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver); -__global__ void runNarrowBandCUDA(tnlNarrowBand< tnlGrid< 2,double, TNL::Devices::Host, int >, double, int >* solver, double tau); -//__global__ void initCUDA(tnlNarrowBand< tnlGrid< 3,double, TNL::Devices::Host, int >, double, int >* solver); -#endif - - - -#include "tnlNarrowBand2D_CUDA_v4_impl.h" -// #include "tnlNarrowBand3D_CUDA_impl.h" - -#endif /* TNLNARROWBAND_H_ */ -- GitLab