diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 983bc5784b130835a146fff6d3fa278f226bbbb1..ee2a49b1eee6efd46f81dd0cb55c60a14a0d54f6 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory( navier-stokes ) #add_subdirectory( hamilton-jacobi ) add_subdirectory( hamilton-jacobi-parallel ) add_subdirectory( fast-sweeping ) +add_subdirectory( hamilton-jacobi-parallel-map ) diff --git a/examples/hamilton-jacobi-parallel-map/CMakeLists.txt b/examples/hamilton-jacobi-parallel-map/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..6ef21c42cae891037ccdc1ad1423a3de0ab8401c --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/CMakeLists.txt @@ -0,0 +1,22 @@ +set( tnl_hamilton_jacobi_parallel_map_SOURCES +# MainBuildConfig.h +# tnlParallelMapSolver2D_impl.h +# tnlParallelMapSolver.h +# parallelMapConfig.h + main.cpp) + + +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE(hamilton-jacobi-parallel-map${debugExt} main.cu) +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE(hamilton-jacobi-parallel-map${debugExt} main.cpp) +ENDIF( BUILD_CUDA ) +target_link_libraries (hamilton-jacobi-parallel-map${debugExt} tnl${debugExt}-${tnlVersion} ) + + +INSTALL( TARGETS hamilton-jacobi-parallel-map${debugExt} + RUNTIME DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) + +#INSTALL( FILES ${tnl_hamilton_jacobi_parallel_map_SOURCES} +# DESTINATION share/tnl-${tnlVersion}/examples/hamilton-jacobi-parallel-map ) diff --git a/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h b/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h new file mode 100644 index 0000000000000000000000000000000000000000..1b904c0c8b1d096a72a6ee8c3cc3cd1979d164b4 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/MainBuildConfig.h @@ -0,0 +1,64 @@ +/*************************************************************************** + MainBuildConfig.h - description + ------------------- + begin : Jul 7, 2014 + copyright : (C) 2014 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef MAINBUILDCONFIG_H_ +#define MAINBUILDCONFIG_H_ + +#include <solvers/tnlBuildConfigTags.h> + +class MainBuildConfig +{ + public: + + static void print() { cerr << "MainBuildConfig" << endl; } +}; + +/**** + * Turn off support for float and long double. + */ +template<> struct tnlConfigTagReal< MainBuildConfig, float > { enum { enabled = false }; }; +template<> struct tnlConfigTagReal< MainBuildConfig, long double > { enum { enabled = false }; }; + +/**** + * Turn off support for short int and long int indexing. + */ +template<> struct tnlConfigTagIndex< MainBuildConfig, short int >{ enum { enabled = false }; }; +template<> struct tnlConfigTagIndex< MainBuildConfig, long int >{ enum { enabled = false }; }; + +/**** + * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. + */ +template< int Dimensions, typename Real, typename Device, typename Index > + struct tnlConfigTagMesh< MainBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > + { enum { enabled = tnlConfigTagDimensions< MainBuildConfig, Dimensions >::enabled && + tnlConfigTagReal< MainBuildConfig, Real >::enabled && + tnlConfigTagDevice< MainBuildConfig, Device >::enabled && + tnlConfigTagIndex< MainBuildConfig, Index >::enabled }; }; + +/**** + * Please, chose your preferred time discretisation here. + */ +template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; +template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = false}; }; +template<> struct tnlConfigTagTimeDiscretisation< MainBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; + +/**** + * Only the Runge-Kutta-Merson solver is enabled by default. + */ +template<> struct tnlConfigTagExplicitSolver< MainBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; + +#endif /* MAINBUILDCONFIG_H_ */ diff --git a/examples/hamilton-jacobi-parallel-map/Makefile~ b/examples/hamilton-jacobi-parallel-map/Makefile~ new file mode 100644 index 0000000000000000000000000000000000000000..d996b5c90703680cd0e9cf334eb70d311caaa8f3 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/Makefile~ @@ -0,0 +1,41 @@ +TNL_VERSION=0.1 +TNL_INSTALL_DIR=${HOME}/local/lib +TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION} + +TARGET = hamiltonJacobiParallelSolver +#CONFIG_FILE = $(TARGET).cfg.desc +INSTALL_DIR = ${HOME}/local +CXX = g++ +CUDA_CXX = nvcc +OMP_FLAGS = -DHAVE_OPENMP -fopenmp +CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG +LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp + +SOURCES = main.cpp +HEADERS = +OBJECTS = main.o +DIST = $(SOURCES) Makefile + +all: $(TARGET) +clean: + rm -f $(OBJECTS) + rm -f $(TARGET)-conf.h + +dist: $(DIST) + tar zcvf $(TARGET).tgz $(DIST) + +install: $(TARGET) + cp $(TARGET) $(INSTALL_DIR)/bin + cp $(CONFIG_FILE) $(INSTALL_DIR)/share + +uninstall: $(TARGET) + rm -f $(INSTALL_DIR)/bin/$(TARGET) + rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share + +$(TARGET): $(OBJECTS) + $(CUDA_CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS) + +%.o: %.cpp $(HEADERS) + $(CXX) -c -o $@ $(CXX_FLAGS) $< + + diff --git a/examples/hamilton-jacobi-parallel-map/main.cpp b/examples/hamilton-jacobi-parallel-map/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b13498e17330fae7bb00a0bdc2abcc7a19f8e7a8 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/main.cpp @@ -0,0 +1,17 @@ +/*************************************************************************** + main.cpp - description + ------------------- + begin : Jul 8 , 2014 + copyright : (C) 2014 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "main.h" diff --git a/examples/hamilton-jacobi-parallel-map/main.cu b/examples/hamilton-jacobi-parallel-map/main.cu new file mode 100644 index 0000000000000000000000000000000000000000..7101976712e153d73c5f0979b211164a36ec648d --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/main.cu @@ -0,0 +1,17 @@ +/*************************************************************************** + main.cu - description + ------------------- + begin : Mar 30 , 2015 + copyright : (C) 2015 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "main.h" diff --git a/examples/hamilton-jacobi-parallel-map/main.h b/examples/hamilton-jacobi-parallel-map/main.h new file mode 100644 index 0000000000000000000000000000000000000000..43c2048920929d96e7787c7d4e365400dd88c972 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/main.h @@ -0,0 +1,99 @@ +/*************************************************************************** + main.h - description + ------------------- + begin : Mar 22 , 2016 + copyright : (C) 2016 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "tnlParallelMapSolver.h" +#include "parallelMapConfig.h" +#include "MainBuildConfig.h" +#include <solvers/tnlBuildConfigTags.h> +#include <operators/godunov-eikonal/parallelGodunovMap.h> +#include <mesh/tnlGrid.h> +#include <core/tnlDevice.h> +#include <time.h> +#include <ctime> + +typedef MainBuildConfig BuildConfig; + +int main( int argc, char* argv[] ) +{ + time_t start; + time_t stop; + time(&start); + std::clock_t start2= std::clock(); + tnlParameterContainer parameters; + tnlConfigDescription configDescription; + parallelMapConfig< BuildConfig >::configSetup( configDescription ); + + if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) + return false; + + + tnlDeviceEnum device; + device = tnlHostDevice; + + const int& dim = parameters.getParameter< int >( "dim" ); + + if(dim == 2) + { + + typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeHost; + /*#ifdef HAVE_CUDA + typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlCuda, int>, double, int > SchemeTypeDevice; + #endif + #ifndef HAVE_CUDA*/ + typedef parallelGodunovMapScheme< tnlGrid<2,double,tnlHost, int>, double, int > SchemeTypeDevice; + /*#endif*/ + + if(device==tnlHostDevice) + { + typedef tnlHost Device; + + + tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } + else if(device==tnlCudaDevice ) + { + typedef tnlCuda Device; + //typedef parallelGodunovMapScheme< tnlGrid<2,double,Device, int>, double, int > SchemeType; + + tnlParallelMapSolver<2,SchemeTypeHost,SchemeTypeDevice, Device> solver; + if(!solver.init(parameters)) + { + cerr << "Solver failed to initialize." << endl; + return EXIT_FAILURE; + } + cout << "-------------------------------------------------------------" << endl; + cout << "Starting solver loop..." << endl; + solver.run(); + } + // } + } + + + time(&stop); + cout << endl; + cout << "Running time was: " << difftime(stop,start) << " .... " << (std::clock() - start2) / (double)(CLOCKS_PER_SEC) << endl; + return EXIT_SUCCESS; +} + + diff --git a/examples/hamilton-jacobi-parallel-map/no-Makefile b/examples/hamilton-jacobi-parallel-map/no-Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bfdc1ef236ca02ecfe6bc88f81d872e9524ec621 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/no-Makefile @@ -0,0 +1,41 @@ +TNL_VERSION=0.1 +TNL_INSTALL_DIR=${HOME}/local/lib +TNL_INCLUDE_DIR=${HOME}/local/include/tnl-${TNL_VERSION} + +TARGET = hamiltonJacobiParallelSolver +#CONFIG_FILE = $(TARGET).cfg.desc +INSTALL_DIR = ${HOME}/local +CXX = g++ +CUDA_CXX = nvcc +OMP_FLAGS = -DHAVE_OPENMP -fopenmp +CXX_FLAGS = -std=gnu++0x -I$(TNL_INCLUDE_DIR) -O3 $(OMP_FLAGS) -DDEBUG +LD_FLAGS = -L$(TNL_INSTALL_DIR) -ltnl-0.1 -lgomp + +SOURCES = main.cpp +HEADERS = +OBJECTS = main.o +DIST = $(SOURCES) Makefile + +all: $(TARGET) +clean: + rm -f $(OBJECTS) + rm -f $(TARGET)-conf.h + +dist: $(DIST) + tar zcvf $(TARGET).tgz $(DIST) + +install: $(TARGET) + cp $(TARGET) $(INSTALL_DIR)/bin + cp $(CONFIG_FILE) $(INSTALL_DIR)/share + +uninstall: $(TARGET) + rm -f $(INSTALL_DIR)/bin/$(TARGET) + rm -f $(CONFIG_FILE) $(INSTALL_DIR)/share + +$(TARGET): $(OBJECTS) + $(CXX) -o $(TARGET) $(OBJECTS) $(LD_FLAGS) + +%.o: %.cpp $(HEADERS) + $(CXX) -c -o $@ $(CXX_FLAGS) $< + + diff --git a/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h b/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h new file mode 100644 index 0000000000000000000000000000000000000000..e67859e9d6f7707d331099425dc534da67e0b1ef --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/parallelMapConfig.h @@ -0,0 +1,47 @@ +/*************************************************************************** + parallelMapConfig.h - description + ------------------- + begin : Mar 22 , 2016 + copyright : (C) 2016 by Tomas Sobotik + email : + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ +#define HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ + +#include <config/tnlConfigDescription.h> + +template< typename ConfigTag > +class parallelMapConfig +{ + public: + static void configSetup( tnlConfigDescription& config ) + { + config.addDelimiter( "Parallel Eikonal solver settings:" ); + config.addEntry < tnlString > ( "problem-name", "This defines particular problem.", "hamilton-jacobi-parallel" ); + config.addEntry < tnlString > ( "scheme", "This defines scheme used for discretization.", "godunov" ); + config.addEntryEnum( "godunov" ); + config.addEntryEnum( "upwind" ); + config.addRequiredEntry < tnlString > ( "initial-condition", "Initial condition for solver"); + config.addRequiredEntry < tnlString > ( "map", "Gradient map for solver"); + config.addEntry < tnlString > ( "mesh", "Name of mesh.", "mesh.tnl" ); + config.addEntry < double > ( "epsilon", "This defines epsilon for smoothening of sign().", 0.0 ); + config.addEntry < double > ( "delta", " Allowed difference on subgrid boundaries", 0.0 ); + config.addRequiredEntry < double > ( "stop-time", " Final time for solver"); + config.addRequiredEntry < double > ( "initial-tau", " initial tau for solver" ); + config.addEntry < double > ( "cfl-condition", " CFL condition", 0.0 ); + config.addEntry < int > ( "subgrid-size", "Subgrid size.", 16 ); + config.addRequiredEntry < int > ( "dim", "Dimension of problem."); + } +}; + +#endif /* HAMILTONJACOBIPARALLELMAPPROBLEMCONFIG_H_ */ diff --git a/examples/hamilton-jacobi-parallel-map/run b/examples/hamilton-jacobi-parallel-map/run new file mode 100755 index 0000000000000000000000000000000000000000..3aece294a9c1189cd885acbe459dba20be713716 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/run @@ -0,0 +1,64 @@ +#!/bin/bash + +#GRID_SIZES="0897" +GRID_SIZES="0008 0015 0029 0057 0113 0225 0449" +#GRID_SIZES="1793" + +dimensions=2 + +size=2 + +time=3 + +for grid_size in $GRID_SIZES; + +do + + rm -r grid-${grid_size} + mkdir grid-${grid_size} + cd grid-${grid_size} + + tnl-grid-setup --dimensions $dimensions \ + --origin-x -1.0 \ + --origin-y -1.0 \ + --origin-z -1.0 \ + --proportions-x $size \ + --proportions-y $size \ + --proportions-z $size \ + --size-x ${grid_size} \ + --size-y ${grid_size} \ + --size-z ${grid_size} + + tnl-init --test-function sdf-para \ + --offset 0.25 \ + --output-file init.tnl \ + --final-time 0.0 \ + --snapshot-period 0.1 \ + + + tnl-init --test-function sdf-para-sdf \ + --offset 0.25 \ + --output-file sdf.tnl \ + --final-time 0.0 \ + --snapshot-period 0.1 + + hamilton-jacobi-parallel --initial-condition init.tnl \ + --cfl-condition 1.0e-1 \ + --mesh mesh.tnl \ + --initial-tau 1.0e-3 \ + --epsilon 1.0 \ + --delta 0.0 \ + --stop-time $time \ + --scheme godunov \ + --subgrid-size 8 + + tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff + + cd .. + +done + + +./tnl-err2eoc-2.py --format txt --size $size *.diff + + diff --git a/examples/hamilton-jacobi-parallel-map/run~ b/examples/hamilton-jacobi-parallel-map/run~ new file mode 100755 index 0000000000000000000000000000000000000000..236669a39749dd09b5870242d965df65295bb6fc --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/run~ @@ -0,0 +1,64 @@ +#!/bin/bash + +#GRID_SIZES="0897" +#GRID_SIZES="0449" +GRID_SIZES="1793" + +dimensions=2 + +size=2 + +time=3 + +for grid_size in $GRID_SIZES; + +do + + rm -r grid-${grid_size} + mkdir grid-${grid_size} + cd grid-${grid_size} + + tnl-grid-setup --dimensions $dimensions \ + --origin-x -1.0 \ + --origin-y -1.0 \ + --origin-z -1.0 \ + --proportions-x $size \ + --proportions-y $size \ + --proportions-z $size \ + --size-x ${grid_size} \ + --size-y ${grid_size} \ + --size-z ${grid_size} + + tnl-init --test-function sdf-para \ + --offset 0.25 \ + --output-file init.tnl \ + --final-time 0.0 \ + --snapshot-period 0.1 \ + + + tnl-init --test-function sdf-para-sdf \ + --offset 0.25 \ + --output-file sdf.tnl \ + --final-time 0.0 \ + --snapshot-period 0.1 + + hamilton-jacobi-parallel --initial-condition init.tnl \ + --cfl-condition 1.0e-1 \ + --mesh mesh.tnl \ + --initial-tau 1.0e-3 \ + --epsilon 1.0 \ + --delta 0.0 \ + --stop-time $time \ + --scheme godunov \ + --subgrid-size 8 + + tnl-diff --mesh mesh.tnl --mode sequence --input-files sdf.tnl u-00001.tnl --write-difference yes --output-file ../${grid_size}.diff + + cd .. + +done + + +./tnl-err2eoc-2.py --format txt --size $size *.diff + + diff --git a/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py b/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py new file mode 100755 index 0000000000000000000000000000000000000000..f8cde3768e9b76156507e133f8bc3ecaa526fc71 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/tnl-err2eoc-2.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +import sys, string, math + +arguments = sys. argv[1:] +format = "txt" +output_file_name = "eoc-table.txt" +input_files = [] +verbose = 1 +size = 1.0 + +i = 0 +while i < len( arguments ): + if arguments[ i ] == "--format": + format = arguments[ i + 1 ] + i = i + 2 + continue + if arguments[ i ] == "--output-file": + output_file_name = arguments[ i + 1 ] + i = i + 2 + continue + if arguments[ i ] == "--verbose": + verbose = float( arguments[ i + 1 ] ) + i = i +2 + continue + if arguments[ i ] == "--size": + size = float( arguments[ i + 1 ] ) + i = i +2 + continue + input_files. append( arguments[ i ] ) + i = i + 1 + +if not verbose == 0: + print "Writing to " + output_file_name + " in " + format + "." + +h_list = [] +l1_norm_list = [] +l2_norm_list = [] +max_norm_list = [] +items = 0 + +for file_name in input_files: + if not verbose == 0: + print "Processing file " + file_name + file = open( file_name, "r" ) + + l1_max = 0.0 + l_max_max = 0.0 + file.readline(); + file.readline(); + for line in file. readlines(): + data = string. split( line ) + h_list. append( size/(float(file_name[0:len(file_name)-5] ) - 1.0) ) + l1_norm_list. append( float( data[ 1 ] ) ) + l2_norm_list. append( float( data[ 2 ] ) ) + max_norm_list. append( float( data[ 3 ] ) ) + items = items + 1 + if not verbose == 0: + print line + file. close() + +h_width = 12 +err_width = 15 +file = open( output_file_name, "w" ) +if format == "latex": + file. write( "\\begin{tabular}{|r|l|l|l|l|l|l|}\\hline\n" ) + file. write( "\\raisebox{-1ex}[0ex]{$h$}& \n" ) + file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_1\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) + file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_2\\left(\\omega_h;\left[0,T\\right]\\right)}^{h,\\tau}$}}& \n" ) + file. write( "\\multicolumn{2}{|c|}{\\raisebox{1ex}[3.5ex]{$\\left\| \\cdot \\right\\|_{L_\\infty\\left(\\omega_h;\\left[0,T\\right]\\right)}^{h,\\tau}$}}\\\\ \\cline{2-7} \n" ) + file. write( " " + string. rjust( " ", h_width ) + "&" + + string. rjust( "Error", err_width ) + "&" + + string. rjust( "{\\bf EOC}", err_width ) + "&" + + string. rjust( "Error", err_width ) + "&" + + string. rjust( "{\\bf EOC}", err_width ) + "&" + + string. rjust( "Error.", err_width ) + "&" + + string. rjust( "{\\bf EOC}", err_width ) + + "\\\\ \\hline \\hline \n") +if format == "txt": + file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) + file. write( "| h | L1 Err. | L1 EOC. | L2 Err. | L2 EOC | MAX Err. | MAX EOC |\n" ) + file. write( "+==============+================+================+================+================+================+================+\n" ) + + +i = 0 +while i < items: + if i == 0: + if format == "latex": + file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + + string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + + string. rjust( " ", err_width ) + "&"+ + string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + + string. rjust( " ", err_width ) + "&" + + string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + + string. rjust( " ", err_width ) + "\\\\\n" ) + if format == "txt": + file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + + string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + + string. rjust( " ", err_width ) + " |" + + string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + + string. rjust( " ", err_width ) + " |" + + string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + + string. rjust( " ", err_width ) + " |\n" ) + file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) + i = i + 1; + continue + if h_list[ i ] == h_list[ i - 1 ]: + print "Unable to count eoc since h[ " + \ + str( i ) + " ] = h[ " + str( i - 1 ) + \ + " ] = " + str( h_list[ i ] ) + ". \n" + file. write( " eoc error: h[ " + \ + str( i ) + " ] = h[ " + str( i - 1 ) + \ + " ] = " + str( h_list[ i ] ) + ". \n" ) + else: + h_ratio = math. log( h_list[ i ] / h_list[ i - 1 ] ) + l1_ratio = math. log( l1_norm_list[ i ] / l1_norm_list[ i - 1 ] ) + l2_ratio = math. log( l2_norm_list[ i ] / l2_norm_list[ i - 1 ] ) + max_ratio = math. log( max_norm_list[ i ] / max_norm_list[ i - 1 ] ) + if format == "latex": + file. write( " " + string. ljust( str( h_list[ i ] ), h_width ) + "&" + + string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + "&" + + string. rjust( "{\\bf " + "%.2g" % ( l1_ratio / h_ratio ) + "}", err_width ) + "&" + + string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + "&" + + string. rjust( "{\\bf " + "%.2g" % ( l2_ratio / h_ratio ) + "}", err_width ) + "&" + + string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + "&" + + string. rjust( "{\\bf " + "%.2g" % ( max_ratio / h_ratio ) + "}", err_width ) + "\\\\\n" ) + if format == "txt": + file. write( "| " + string. ljust( str( h_list[ i ] ), h_width ) + " |" + + string. rjust( "%.2g" % l1_norm_list[ i ], err_width ) + " |" + + string. rjust( "**" + "%.2g" % ( l1_ratio / h_ratio ) + "**", err_width ) + " |" + + string. rjust( "%.2g" % l2_norm_list[ i ], err_width ) + " |" + + string. rjust( "**" + "%.2g" % ( l2_ratio / h_ratio ) + "**", err_width ) + " |" + + string. rjust( "%.2g" % max_norm_list[ i ], err_width ) + " |" + + string. rjust( "**" + "%.2g" % ( max_ratio / h_ratio ) + "**", err_width ) + " |\n" ) + file. write( "+--------------+----------------+----------------+----------------+----------------+----------------+----------------+\n" ) + i = i + 1 + +if format == "latex": + file. write( "\\hline \n" ) + file. write( "\\end{tabular} \n" ) + diff --git a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h new file mode 100644 index 0000000000000000000000000000000000000000..5c3c5ca2fd6f3cc1a8ea34431d66d1742bc1554c --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h @@ -0,0 +1,365 @@ +/*************************************************************************** + tnlParallelMapSolver.h - description + ------------------- + begin : Mar 22 , 2016 + copyright : (C) 2016 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef TNLPARALLELMAPSOLVER_H_ +#define TNLPARALLELMAPSOLVER_H_ + +#include <config/tnlParameterContainer.h> +#include <core/vectors/tnlVector.h> +#include <core/vectors/tnlStaticVector.h> +#include <functions/tnlMeshFunction.h> +#include <core/tnlHost.h> +#include <mesh/tnlGrid.h> +#include <mesh/grids/tnlGridEntity.h> +#include <limits.h> +#include <core/tnlDevice.h> + + +#include <ctime> + +#ifdef HAVE_CUDA +#include <cuda.h> +#include <core/tnlCuda.h> +#endif + + +template< int Dimension, + typename SchemeHost, + typename SchemeDevice, + typename Device, + typename RealType = double, + typename IndexType = int > +class tnlParallelMapSolver +{}; + +template<typename SchemeHost, typename SchemeDevice, typename Device> +class tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int > +{ +public: + + typedef SchemeDevice SchemeTypeDevice; + typedef SchemeHost SchemeTypeHost; + typedef Device DeviceType; + typedef tnlVector< double, tnlHost, int > VectorType; + typedef tnlVector< int, tnlHost, int > IntVectorType; + typedef tnlGrid< 2, double, tnlHost, int > MeshType; +#ifdef HAVE_CUDA + typedef tnlVector< double, tnlHost, int > VectorTypeCUDA; + typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA; + typedef tnlGrid< 2, double, tnlHost, int > MeshTypeCUDA; +#endif + tnlParallelMapSolver(); + bool init( const tnlParameterContainer& parameters ); + void run(); + + void test(); + +/*private:*/ + + + void synchronize(); + + int getOwner( int i) const; + + int getSubgridValue( int i ) const; + + void setSubgridValue( int i, int value ); + + int getBoundaryCondition( int i ) const; + + void setBoundaryCondition( int i, int value ); + + void stretchGrid(); + + void contractGrid(); + + VectorType getSubgrid( const int i ) const; + + void insertSubgrid( VectorType u, const int i ); + + VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); + + + tnlMeshFunction<MeshType> u0; + VectorType work_u, map_stretched, map; + IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; + MeshType mesh, subMesh; + +// tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage > Entity; + + SchemeHost schemeHost; + SchemeDevice schemeDevice; + double delta, tau0, stopTime,cflCondition; + int gridRows, gridCols, gridLevels, currentStep, n; + + std::clock_t start; + double time_diff; + + + tnlDeviceEnum device; + + tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* getSelf() + { + return this; + }; + +#ifdef HAVE_CUDA + + tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; + + double* work_u_cuda; + double* map_stretched_cuda; + + int* subgridValues_cuda; + int* boundaryConditions_cuda; + int* unusedCell_cuda; + int* calculationsCount_cuda; + double* tmpw; + double* tmp_map; + //MeshTypeCUDA mesh_cuda, subMesh_cuda; + //SchemeDevice scheme_cuda; + //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; + //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; + + int* runcuda; + int run_host; + + + __device__ void getSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); + + __device__ void updateSubgridCUDA2D( const int i, tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); + + __device__ void insertSubgridCUDA2D( double u, const int i ); + + __device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID); + + /*__global__ void runCUDA();*/ + + //__device__ void synchronizeCUDA(); + + __device__ int getOwnerCUDA2D( int i) const; + + __device__ int getSubgridValueCUDA2D( int i ) const; + + __device__ void setSubgridValueCUDA2D( int i, int value ); + + __device__ int getBoundaryConditionCUDA2D( int i ) const; + + __device__ void setBoundaryConditionCUDA2D( int i, int value ); + + //__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); + + /*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/ + +#endif + +}; + + + + + + + + template<typename SchemeHost, typename SchemeDevice, typename Device> + class tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int > + { + public: + + typedef SchemeDevice SchemeTypeDevice; + typedef SchemeHost SchemeTypeHost; + typedef Device DeviceType; + typedef tnlVector< double, tnlHost, int > VectorType; + typedef tnlVector< int, tnlHost, int > IntVectorType; + typedef tnlGrid< 3, double, tnlHost, int > MeshType; + #ifdef HAVE_CUDA + typedef tnlVector< double, tnlHost, int > VectorTypeCUDA; + typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA; + typedef tnlGrid< 3, double, tnlHost, int > MeshTypeCUDA; + #endif + tnlParallelMapSolver(); + bool init( const tnlParameterContainer& parameters ); + void run(); + + void test(); + + /*private:*/ + + + void synchronize(); + + int getOwner( int i) const; + + int getSubgridValue( int i ) const; + + void setSubgridValue( int i, int value ); + + int getBoundaryCondition( int i ) const; + + void setBoundaryCondition( int i, int value ); + + void stretchGrid(); + + void contractGrid(); + + VectorType getSubgrid( const int i ) const; + + void insertSubgrid( VectorType u, const int i ); + + VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID); + + + tnlMeshFunction<MeshType> u0; + VectorType work_u; + IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount; + MeshType mesh, subMesh; + SchemeHost schemeHost; + SchemeDevice schemeDevice; + double delta, tau0, stopTime,cflCondition; + int gridRows, gridCols, gridLevels, currentStep, n; + + std::clock_t start; + double time_diff; + + + tnlDeviceEnum device; + + tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf() + { + return this; + }; + +#ifdef HAVE_CUDA + + tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver; + + double* work_u_cuda; + + int* subgridValues_cuda; + int* boundaryConditions_cuda; + int* unusedCell_cuda; + int* calculationsCount_cuda; + double* tmpw; + //MeshTypeCUDA mesh_cuda, subMesh_cuda; + //SchemeDevice scheme_cuda; + //double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda; + //int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda; + + int* runcuda; + int run_host; + + + __device__ void getSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); + + __device__ void updateSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a); + + __device__ void insertSubgridCUDA3D( double u, const int i ); + + __device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID); + + /*__global__ void runCUDA();*/ + + //__device__ void synchronizeCUDA(); + + __device__ int getOwnerCUDA3D( int i) const; + + __device__ int getSubgridValueCUDA3D( int i ) const; + + __device__ void setSubgridValueCUDA3D( int i, int value ); + + __device__ int getBoundaryConditionCUDA3D( int i ) const; + + __device__ void setBoundaryConditionCUDA3D( int i, int value ); + + //__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); + + /*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/ + +#endif + +}; + + + + + + +#ifdef HAVE_CUDA +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void runCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void initRunCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* caller); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void initCUDA2D( tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3, double* tmp_map_ptr); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); + + + + + + + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void runCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void initRunCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void initCUDA3D( tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void synchronizeCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ void synchronize2CUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); +#endif + + +__device__ +double fabsMin( double x, double y) +{ + double fx = abs(x); + + if(Min(fx,abs(y)) == fx) + return x; + else + return y; +} + +__device__ +double atomicFabsMin(double* address, double val) +{ + unsigned long long int* address_as_ull = + (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed,__double_as_longlong( fabsMin(__longlong_as_double(assumed),val) )); + } while (assumed != old); + return __longlong_as_double(old); +} + + +#include "tnlParallelMapSolver2D_impl.h" +#endif /* TNLPARALLELMAPSOLVER_H_ */ diff --git a/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..cc611b950163b1a77309f54ff0a629b306fd54e5 --- /dev/null +++ b/examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver2D_impl.h @@ -0,0 +1,1957 @@ +/*************************************************************************** + tnlParallelMapSolver2D_impl.h - description + ------------------- + begin : Mar 22 , 2016 + copyright : (C) 2016 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef TNLPARALLELMAPSOLVER2D_IMPL_H_ +#define TNLPARALLELMAPSOLVER2D_IMPL_H_ + + +#include "tnlParallelMapSolver.h" +#include <core/mfilename.h> + + + + +#define MAP_SOLVER_MAX_VALUE 150 + + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::tnlParallelMapSolver() +{ + cout << "a" << endl; + this->device = tnlCudaDevice; /////////////// tnlCuda Device --- vypocet na GPU, tnlHostDevice --- vypocet na CPU + +#ifdef HAVE_CUDA + if(this->device == tnlCudaDevice) + { + run_host = 1; + } +#endif + + cout << "b" << endl; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::test() +{ +/* + for(int i =0; i < this->subgridValues.getSize(); i++ ) + { + insertSubgrid(getSubgrid(i), i); + } +*/ +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> + +bool tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::init( const tnlParameterContainer& parameters ) +{ + cout << "Initializating solver..." << endl; + const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh"); + this->mesh.load( meshLocation ); + + this->n = parameters.getParameter <int>("subgrid-size"); + cout << "Setting N to " << this->n << endl; + + this->subMesh.setDimensions( this->n, this->n ); + this->subMesh.setDomain( tnlStaticVector<2,double>(0.0, 0.0), + tnlStaticVector<2,double>(mesh.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n), mesh.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n)) ); + + this->subMesh.save("submesh.tnl"); + + const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition"); + this->u0.load( initialCondition ); + + /* LOAD MAP */ + const tnlString& mapFile = parameters.getParameter <tnlString>("map"); + if(! this->map.load( mapFile )) + cout << "Failed to load map file : " << mapFile << endl; + + //cout << this->mesh.getCellCenter(0) << endl; + + this->delta = parameters.getParameter <double>("delta"); + this->delta *= mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >(); + + cout << "Setting delta to " << this->delta << endl; + + this->tau0 = parameters.getParameter <double>("initial-tau"); + cout << "Setting initial tau to " << this->tau0 << endl; + this->stopTime = parameters.getParameter <double>("stop-time"); + + this->cflCondition = parameters.getParameter <double>("cfl-condition"); + this -> cflCondition *= sqrt(mesh.template getSpaceStepsProducts< 1, 0 >()*mesh.template getSpaceStepsProducts< 0, 1 >()); + cout << "Setting CFL to " << this->cflCondition << endl; + + stretchGrid(); + this->stopTime /= (double)(this->gridCols); + this->stopTime *= (1.0+1.0/((double)(this->n) - 2.0)); + cout << "Setting stopping time to " << this->stopTime << endl; + //this->stopTime = 1.5*((double)(this->n))*parameters.getParameter <double>("stop-time")*this->mesh.template getSpaceStepsProducts< 1, 0 >(); + //cout << "Setting stopping time to " << this->stopTime << endl; + + cout << "Initializating scheme..." << endl; + if(!this->schemeHost.init(parameters)) + { + cerr << "SchemeHost failed to initialize." << endl; + return false; + } + cout << "Scheme initialized." << endl; + + test(); + + VectorType* tmp = new VectorType[subgridValues.getSize()]; + bool containsCurve = false; + +#ifdef HAVE_CUDA + + if(this->device == tnlCudaDevice) + { + /*cout << "Testing... " << endl; + if(this->device == tnlCudaDevice) + { + if( !initCUDA2D(parameters, gridRows, gridCols) ) + return false; + }*/ + //cout << "s" << endl; + cudaMalloc(&(this->cudaSolver), sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >)); + //cout << "s" << endl; + cudaMemcpy(this->cudaSolver, this,sizeof(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >), cudaMemcpyHostToDevice); + //cout << "s" << endl; + double** tmpdev = NULL; + cudaMalloc(&tmpdev, sizeof(double*)); + //double* tmpw; + cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); + cudaMalloc(&(this->tmp_map), this->map_stretched.getSize()*sizeof(double)); + cudaMalloc(&(this->runcuda), sizeof(int)); + cudaDeviceSynchronize(); + checkCudaDevice; + int* tmpUC; + cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); + cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); + + initCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC, tmp_map); + cudaDeviceSynchronize(); + checkCudaDevice; + //cout << "s " << endl; + //cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double)); + double* tmpu = NULL; + + cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); + //printf("%p %p \n",tmpu,tmpw); + cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy((this->tmp_map), this->map_stretched.getData(), this->map_stretched.getSize()*sizeof(double), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + checkCudaDevice; + //cout << "s "<< endl; + + } +#endif + + if(this->device == tnlHostDevice) + { + for(int i = 0; i < this->subgridValues.getSize(); i++) + { + + if(! tmp[i].setSize(this->n * this->n)) + cout << "Could not allocate tmp["<< i <<"] array." << endl; + tmp[i] = getSubgrid(i); + containsCurve = false; + + for(int j = 0; j < tmp[i].getSize(); j++) + { + if(tmp[i][0]*tmp[i][j] <= 0.0) + { + containsCurve = true; + j=tmp[i].getSize(); + } + + } + if(containsCurve) + { + //cout << "Computing initial SDF on subgrid " << i << "." << endl; + insertSubgrid(runSubgrid(0, tmp[i],i), i); + setSubgridValue(i, 4); + //cout << "Computed initial SDF on subgrid " << i << "." << endl; + } + containsCurve = false; + + } + } +#ifdef HAVE_CUDA + else if(this->device == tnlCudaDevice) + { +// cout << "pre 1 kernel" << endl; + cudaDeviceSynchronize(); + checkCudaDevice; + dim3 threadsPerBlock(this->n, this->n); + dim3 numBlocks(this->gridCols,this->gridRows); + cudaDeviceSynchronize(); + checkCudaDevice; + initRunCUDA2D<SchemeTypeHost,SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); + cudaDeviceSynchronize(); +// cout << "post 1 kernel" << endl; + + } +#endif + + + this->currentStep = 1; + if(this->device == tnlHostDevice) + synchronize(); +#ifdef HAVE_CUDA + else if(this->device == tnlCudaDevice) + { + dim3 threadsPerBlock(this->n, this->n); + dim3 numBlocks(this->gridCols,this->gridRows); + //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); + //cout << test[0] <<" " << test[1] <<" " << test[2] <<" " << test[3] << endl; + //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); + //cout << this->tmpw << " " << test[0] <<" " << test[1] << " " <<test[2] << " " <<test[3] << endl; + + checkCudaDevice; + + synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); + cudaDeviceSynchronize(); + checkCudaDevice; + synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); + cudaDeviceSynchronize(); + checkCudaDevice; + //cout << test[0] << " " <<test[1] <<" " << test[2] << " " <<test[3] << endl; + //cudaMemcpy(/*this->work_u.getData()*/ test, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); + //checkCudaDevice; + //cout << this->tmpw << " " << test[0] << " " <<test[1] << " " <<test[2] <<" " << test[3] << endl; + //free(test); + + } + +#endif + cout << "Solver initialized." << endl; + + return true; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::run() +{ + if(this->device == tnlHostDevice) + { + + bool end = false; + while ((this->boundaryConditions.max() > 0 ) || !end) + { + if(this->boundaryConditions.max() == 0 ) + end=true; + else + end=false; +#ifdef HAVE_OPENMP +#pragma omp parallel for num_threads(4) schedule(dynamic) +#endif + for(int i = 0; i < this->subgridValues.getSize(); i++) + { + if(getSubgridValue(i) != INT_MAX) + { + //cout << "subMesh: " << i << ", BC: " << getBoundaryCondition(i) << endl; + + if(getSubgridValue(i) == currentStep+4) + { + + if(getBoundaryCondition(i) & 1) + { + insertSubgrid( runSubgrid(1, getSubgrid(i),i), i); + this->calculationsCount[i]++; + } + if(getBoundaryCondition(i) & 2) + { + insertSubgrid( runSubgrid(2, getSubgrid(i),i), i); + this->calculationsCount[i]++; + } + if(getBoundaryCondition(i) & 4) + { + insertSubgrid( runSubgrid(4, getSubgrid(i),i), i); + this->calculationsCount[i]++; + } + if(getBoundaryCondition(i) & 8) + { + insertSubgrid( runSubgrid(8, getSubgrid(i),i), i); + this->calculationsCount[i]++; + } + } + + if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 1)//) + /* &&(!(getBoundaryCondition(i) & 5) && !(getBoundaryCondition(i) & 10)) */) + { + //cout << "3 @ " << getBoundaryCondition(i) << endl; + insertSubgrid( runSubgrid(3, getSubgrid(i),i), i); + } + if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 1)//) + /* &&(!(getBoundaryCondition(i) & 3) && !(getBoundaryCondition(i) & 12)) */) + { + //cout << "5 @ " << getBoundaryCondition(i) << endl; + insertSubgrid( runSubgrid(5, getSubgrid(i),i), i); + } + if( ((getBoundaryCondition(i) & 2) )|| (getBoundaryCondition(i) & 8)//) + /* &&(!(getBoundaryCondition(i) & 12) && !(getBoundaryCondition(i) & 3))*/ ) + { + //cout << "10 @ " << getBoundaryCondition(i) << endl; + insertSubgrid( runSubgrid(10, getSubgrid(i),i), i); + } + if( ((getBoundaryCondition(i) & 4) )|| (getBoundaryCondition(i) & 8)//) + /*&&(!(getBoundaryCondition(i) & 10) && !(getBoundaryCondition(i) & 5)) */) + { + //cout << "12 @ " << getBoundaryCondition(i) << endl; + insertSubgrid( runSubgrid(12, getSubgrid(i),i), i); + } + + + /*if(getBoundaryCondition(i)) + { + insertSubgrid( runSubgrid(15, getSubgrid(i),i), i); + }*/ + + setBoundaryCondition(i, 0); + + setSubgridValue(i, getSubgridValue(i)-1); + + } + } + synchronize(); + } + } +#ifdef HAVE_CUDA + else if(this->device == tnlCudaDevice) + { + //cout << "fn" << endl; + bool end_cuda = false; + dim3 threadsPerBlock(this->n, this->n); + dim3 numBlocks(this->gridCols,this->gridRows); + cudaDeviceSynchronize(); + checkCudaDevice; + //cudaMalloc(&runcuda,sizeof(bool)); + //cudaMemcpy(runcuda, &run_host, sizeof(bool), cudaMemcpyHostToDevice); + //cout << "fn" << endl; + bool* tmpb; + //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); + //cudaDeviceSynchronize(); + //checkCudaDevice; + cudaMemcpy(&(this->run_host),this->runcuda,sizeof(int), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + checkCudaDevice; + //cout << "fn" << endl; + int i = 1; + time_diff = 0.0; + while (run_host || !end_cuda) + { + cout << "Computing at step "<< i++ << endl; + if(run_host != 0 ) + end_cuda = true; + else + end_cuda = false; + //cout << "a" << endl; + cudaDeviceSynchronize(); + checkCudaDevice; + start = std::clock(); + runCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock,3*this->n*this->n*sizeof(double)>>>(this->cudaSolver); + //cout << "a" << endl; + cudaDeviceSynchronize(); + time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); + + //start = std::clock(); + synchronizeCUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,threadsPerBlock>>>(this->cudaSolver); + cudaDeviceSynchronize(); + checkCudaDevice; + synchronize2CUDA2D<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<numBlocks,1>>>(this->cudaSolver); + cudaDeviceSynchronize(); + checkCudaDevice; + //time_diff += (std::clock() - start) / (double)(CLOCKS_PER_SEC); + + + //cout << "a" << endl; + //run_host = false; + //cout << "in kernel loop" << run_host << endl; + //cudaMemcpy(tmpb, &(cudaSolver->runcuda),sizeof(bool*), cudaMemcpyDeviceToHost); + cudaMemcpy(&run_host, (this->runcuda),sizeof(int), cudaMemcpyDeviceToHost); + //cout << "in kernel loop" << run_host << endl; + } + cout << "Solving time was: " << time_diff << endl; + //cout << "b" << endl; + + //double* tmpu; + //cudaMemcpy(tmpu, &(cudaSolver->work_u_cuda),sizeof(double*), cudaMemcpyHostToDevice); + //cudaMemcpy(this->work_u.getData(), tmpu, this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); + //cout << this->work_u.getData()[0] << endl; + + //double * test = (double*)malloc(this->work_u.getSize()*sizeof(double)); + //cout << test[0] << test[1] << test[2] << test[3] << endl; + cudaMemcpy(this->work_u.getData()/* test*/, (this->tmpw), this->work_u.getSize()*sizeof(double), cudaMemcpyDeviceToHost); + //cout << this->tmpw << " " << test[0] << test[1] << test[2] << test[3] << endl; + //free(test); + + cudaDeviceSynchronize(); + } +#endif + contractGrid(); + this->u0.save("u-00001.tnl"); + cout << "Maximum number of calculations on one subgrid was " << this->calculationsCount.absMax() << endl; + cout << "Average number of calculations on one subgrid was " << ( (double) this->calculationsCount.sum() / (double) this->calculationsCount.getSize() ) << endl; + cout << "Solver finished" << endl; + +#ifdef HAVE_CUDA + if(this->device == tnlCudaDevice) + { + cudaFree(this->runcuda); + cudaFree(this->tmpw); + cudaFree(this->tmp_map); + cudaFree(this->cudaSolver); + } +#endif + +} + +//north - 1, east - 2, west - 4, south - 8 +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::synchronize() //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now +{ + cout << "Synchronizig..." << endl; + int tmp1, tmp2; + int grid1, grid2; + + if(this->currentStep & 1) + { + for(int j = 0; j < this->gridRows - 1; j++) + { + for (int i = 0; i < this->gridCols*this->n; i++) + { + tmp1 = this->gridCols*this->n*((this->n-1)+j*this->n) + i; + tmp2 = this->gridCols*this->n*((this->n)+j*this->n) + i; + grid1 = getSubgridValue(getOwner(tmp1)); + grid2 = getSubgridValue(getOwner(tmp2)); + if(getOwner(tmp1)==getOwner(tmp2)) + cout << "i, j" << i << "," << j << endl; + if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) + { + this->work_u[tmp2] = this->work_u[tmp1]; + this->unusedCell[tmp2] = 0; + if(grid2 == INT_MAX) + { + setSubgridValue(getOwner(tmp2), -INT_MAX); + } + if(! (getBoundaryCondition(getOwner(tmp2)) & 8) ) + setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+8); + } + else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) + { + this->work_u[tmp1] = this->work_u[tmp2]; + this->unusedCell[tmp1] = 0; + if(grid1 == INT_MAX) + { + setSubgridValue(getOwner(tmp1), -INT_MAX); + } + if(! (getBoundaryCondition(getOwner(tmp1)) & 1) ) + setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+1); + } + } + } + + } + else + { + for(int i = 1; i < this->gridCols; i++) + { + for (int j = 0; j < this->gridRows*this->n; j++) + { + tmp1 = this->gridCols*this->n*j + i*this->n - 1; + tmp2 = this->gridCols*this->n*j + i*this->n ; + grid1 = getSubgridValue(getOwner(tmp1)); + grid2 = getSubgridValue(getOwner(tmp2)); + if(getOwner(tmp1)==getOwner(tmp2)) + cout << "i, j" << i << "," << j << endl; + if ((fabs(this->work_u[tmp1]) < fabs(this->work_u[tmp2]) - this->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) + { + this->work_u[tmp2] = this->work_u[tmp1]; + this->unusedCell[tmp2] = 0; + if(grid2 == INT_MAX) + { + setSubgridValue(getOwner(tmp2), -INT_MAX); + } + if(! (getBoundaryCondition(getOwner(tmp2)) & 4) ) + setBoundaryCondition(getOwner(tmp2), getBoundaryCondition(getOwner(tmp2))+4); + } + else if ((fabs(this->work_u[tmp1]) > fabs(this->work_u[tmp2]) + this->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) + { + this->work_u[tmp1] = this->work_u[tmp2]; + this->unusedCell[tmp1] = 0; + if(grid1 == INT_MAX) + { + setSubgridValue(getOwner(tmp1), -INT_MAX); + } + if(! (getBoundaryCondition(getOwner(tmp1)) & 2) ) + setBoundaryCondition(getOwner(tmp1), getBoundaryCondition(getOwner(tmp1))+2); + } + } + } + } + + + this->currentStep++; + int stepValue = this->currentStep + 4; + for (int i = 0; i < this->subgridValues.getSize(); i++) + { + if( getSubgridValue(i) == -INT_MAX ) + setSubgridValue(i, stepValue); + } + + cout << "Grid synchronized at step " << (this->currentStep - 1 ) << endl; + +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwner(int i) const +{ + + return (i / (this->gridCols*this->n*this->n))*this->gridCols + (i % (this->gridCols*this->n))/this->n; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValue( int i ) const +{ + return this->subgridValues[i]; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValue(int i, int value) +{ + this->subgridValues[i] = value; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryCondition( int i ) const +{ + return this->boundaryConditions[i]; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryCondition(int i, int value) +{ + this->boundaryConditions[i] = value; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::stretchGrid() +{ + cout << "Stretching grid..." << endl; + + + this->gridCols = ceil( ((double)(this->mesh.getDimensions().x()-1)) / ((double)(this->n-1)) ); + this->gridRows = ceil( ((double)(this->mesh.getDimensions().y()-1)) / ((double)(this->n-1)) ); + + //this->gridCols = (this->mesh.getDimensions().x()-1) / (this->n-1) ; + //this->gridRows = (this->mesh.getDimensions().y()-1) / (this->n-1) ; + + cout << "Setting gridCols to " << this->gridCols << "." << endl; + cout << "Setting gridRows to " << this->gridRows << "." << endl; + + this->subgridValues.setSize(this->gridCols*this->gridRows); + this->subgridValues.setValue(0); + this->boundaryConditions.setSize(this->gridCols*this->gridRows); + this->boundaryConditions.setValue(0); + this->calculationsCount.setSize(this->gridCols*this->gridRows); + this->calculationsCount.setValue(0); + + for(int i = 0; i < this->subgridValues.getSize(); i++ ) + { + this->subgridValues[i] = INT_MAX; + this->boundaryConditions[i] = 0; + } + + int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; + + if(!this->work_u.setSize(stretchedSize)) + cerr << "Could not allocate memory for stretched grid." << endl; + if(!this->map_stretched.setSize(stretchedSize)) + cerr << "Could not allocate memory for stretched map." << endl; + if(!this->unusedCell.setSize(stretchedSize)) + cerr << "Could not allocate memory for supporting stretched grid." << endl; + int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); + cout << idealStretch << endl; + + for(int i = 0; i < stretchedSize; i++) + { + this->unusedCell[i] = 1; + int diff =(this->n*this->gridCols) - idealStretch ; + //cout << "diff = " << diff <<endl; + int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; + + if(i%(this->n*this->gridCols) - idealStretch >= 0) + { + //cout << i%(this->n*this->gridCols) - idealStretch +1 << endl; + k+= i%(this->n*this->gridCols) - idealStretch +1 ; + } + + if(i/(this->n*this->gridCols) - idealStretch + 1 > 0) + { + //cout << i/(this->n*this->gridCols) - idealStretch + 1 << endl; + k+= (i/(this->n*this->gridCols) - idealStretch +1 )* this->mesh.getDimensions().x() ; + } + + //cout << "i = " << i << " : i-k = " << i-k << endl; + /*int j=(i % (this->n*this->gridCols)) - ( (this->mesh.getDimensions().x() - this->n)/(this->n - 1) + this->mesh.getDimensions().x() - 1) + + (this->n*this->gridCols - this->mesh.getDimensions().x())*(i/(this->n*this->n*this->gridCols)) ; + + if(j > 0) + k += j; + + int l = i-k - (this->u0.getSize() - 1); + int m = (l % this->mesh.getDimensions().x()); + + if(l>0) + k+= l + ( (l / this->mesh.getDimensions().x()) + 1 )*this->mesh.getDimensions().x() - (l % this->mesh.getDimensions().x());*/ + + this->work_u[i] = this->u0[i-k]; +///**/cout << "bla " << i << " " << this->map_stretched.getSize() << " " << i-k << " " << this->map.getData().getSize() << endl; + this->map_stretched[i] = this->map[i-k]; +///**/cout << "bla " << i << " " << this->map_stretched.getSize() << " " << i-k << " " << this->map.getData().getSize() << endl; + //cout << (i-k) <<endl; + } + + + cout << "Grid stretched." << endl; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::contractGrid() +{ + cout << "Contracting grid..." << endl; + int stretchedSize = this->n*this->n*this->gridCols*this->gridRows; + + int idealStretch =this->mesh.getDimensions().x() + (this->mesh.getDimensions().x()-2)/(this->n-1); + cout << idealStretch << endl; + + for(int i = 0; i < stretchedSize; i++) + { + int diff =(this->n*this->gridCols) - idealStretch ; + int k = i/this->n - i/(this->n*this->gridCols) + this->mesh.getDimensions().x()*(i/(this->n*this->n*this->gridCols)) + (i/(this->n*this->gridCols))*diff; + + if((i%(this->n*this->gridCols) - idealStretch < 0) && (i/(this->n*this->gridCols) - idealStretch + 1 <= 0)) + { + //cout << i <<" : " <<i-k<< endl; + this->u0[i-k] = this->work_u[i]; + } + + } + + cout << "Grid contracted" << endl; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType +tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgrid( const int i ) const +{ + VectorType u; + u.setSize(this->n*this->n); + + for( int j = 0; j < u.getSize(); j++) + { + u[j] = this->work_u[ (i / this->gridCols) * this->n*this->n*this->gridCols + + (i % this->gridCols) * this->n + + (j/this->n) * this->n*this->gridCols + + (j % this->n) ]; + } + return u; +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgrid( VectorType u, const int i ) +{ + + for( int j = 0; j < this->n*this->n; j++) + { + int index = (i / this->gridCols)*this->n*this->n*this->gridCols + (i % this->gridCols)*this->n + (j/this->n)*this->n*this->gridCols + (j % this->n); + //OMP LOCK index + if( (fabs(this->work_u[index]) > fabs(u[j])) || (this->unusedCell[index] == 1) ) + { + this->work_u[index] = u[j]; + this->unusedCell[index] = 0; + } + //OMP UNLOCK index + } +} + +template< typename SchemeHost, typename SchemeDevice, typename Device> +typename tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::VectorType +tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgrid( int boundaryCondition, VectorType u, int subGridID) +{ + + VectorType fu; + + fu.setLike(u); + fu.setValue( 0.0 ); + +/* + * Insert Euler-Solver Here + */ + + /**/ + + /*for(int i = 0; i < u.getSize(); i++) + { + int x = this->subMesh.getCellCoordinates(i).x(); + int y = this->subMesh.getCellCoordinates(i).y(); + + if(x == 0 && (boundaryCondition & 4) && y ==0) + { + if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) + { + //cout << "x = 0; y = 0" << endl; + u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); + } + } + else if(x == 0 && (boundaryCondition & 4) && y == subMesh.getDimensions().y() - 1) + { + if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) + { + //cout << "x = 0; y = n" << endl; + u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); + } + } + + + else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y ==0) + { + if((u[subMesh.getCellYSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) + { + //cout << "x = n; y = 0" << endl; + u[i] = u[subMesh.getCellYSuccessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); + } + } + else if(x == subMesh.getDimensions().x() - 1 && (boundaryCondition & 2) && y == subMesh.getDimensions().y() - 1) + { + if((u[subMesh.getCellYPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 0, 1 >() > 1.0) + { + //cout << "x = n; y = n" << endl; + u[i] = u[subMesh.getCellYPredecessor( i )] - subMesh.template getSpaceStepsProducts< 0, 1 >(); + } + } + + + else if(y == 0 && (boundaryCondition & 8) && x ==0) + { + if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) + { + //cout << "y = 0; x = 0" << endl; + u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + } + else if(y == 0 && (boundaryCondition & 8) && x == subMesh.getDimensions().x() - 1) + { + if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) + { + //cout << "y = 0; x = n" << endl; + u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + } + + + else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x ==0) + { + if((u[subMesh.getCellXSuccessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) { + //cout << "y = n; x = 0" << endl; + u[i] = u[subMesh.getCellXSuccessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + } + else if(y == subMesh.getDimensions().y() - 1 && (boundaryCondition & 1) && x == subMesh.getDimensions().x() - 1) + { + if((u[subMesh.getCellXPredecessor( i )] - u[i])/subMesh.template getSpaceStepsProducts< 1, 0 >() > 1.0) + { + //cout << "y = n; x = n" << endl; + u[i] = u[subMesh.getCellXPredecessor( i )] - subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + } + }*/ + + /**/ + + +/* bool tmp = false; + for(int i = 0; i < u.getSize(); i++) + { + if(u[0]*u[i] <= 0.0) + tmp=true; + } + + + if(tmp) + {} + else if(boundaryCondition == 4) + { + int i; + for(i = 0; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) + { + int j; + for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + int j; + for(j = i; j < subMesh.getDimensions().x() - 1; j=subMesh.getCellXSuccessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + else if(boundaryCondition == 8) + { + int i; + for(i = 0; i < subMesh.getDimensions().x() - 1; i=subMesh.getCellXSuccessor(i)) + { + int j; + for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + int j; + for(j = i; j < u.getSize() - subMesh.getDimensions().x(); j=subMesh.getCellYSuccessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + + } + else if(boundaryCondition == 2) + { + int i; + for(i = subMesh.getDimensions().x() - 1; i < u.getSize() - subMesh.getDimensions().x() ; i=subMesh.getCellYSuccessor(i)) + { + int j; + for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + int j; + for(j = i; j > (i-1)*subMesh.getDimensions().x(); j=subMesh.getCellXPredecessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + else if(boundaryCondition == 1) + { + int i; + for(i = (subMesh.getDimensions().y() - 1)*subMesh.getDimensions().x(); i < u.getSize() - 1; i=subMesh.getCellXSuccessor(i)) + { + int j; + for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } + int j; + for(j = i; j >=subMesh.getDimensions().x(); j=subMesh.getCellYPredecessor(j)) + { + u[j] = u[i]; + } + u[j] = u[i]; + } +*/ + /**/ + + + + bool tmp = false; + for(int i = 0; i < u.getSize(); i++) + { + if(u[0]*u[i] <= 0.0) + tmp=true; + int centreGID = (this->n*(subGridID / this->gridRows)+ (this->n >> 1))*(this->n*this->gridCols) + this->n*(subGridID % this->gridRows) + (this->n >> 1); + if(this->unusedCell[centreGID] == 0 || boundaryCondition == 0) + tmp = true; + } + //if(this->currentStep + 3 < getSubgridValue(subGridID)) + //tmp = true; + + + double value = Sign(u[0]) * u.absMax(); + + if(tmp) + {} + + + //north - 1, east - 2, west - 4, south - 8 + else if(boundaryCondition == 4) + { + for(int i = 0; i < this->n; i++) + for(int j = 1;j < this->n; j++) + //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) + u[i*this->n + j] = value;// u[i*this->n]; + } + else if(boundaryCondition == 2) + { + for(int i = 0; i < this->n; i++) + for(int j =0 ;j < this->n -1; j++) + //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) + u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; + } + else if(boundaryCondition == 1) + { + for(int j = 0; j < this->n; j++) + for(int i = 0;i < this->n - 1; i++) + //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) + u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; + } + else if(boundaryCondition == 8) + { + for(int j = 0; j < this->n; j++) + for(int i = 1;i < this->n; i++) + //if(fabs(u[i*this->n + j]) < fabs(u[j])) + u[i*this->n + j] = value;// u[j]; + } + +/* + + else if(boundaryCondition == 5) + { + for(int i = 0; i < this->n - 1; i++) + for(int j = 1;j < this->n; j++) + //if(fabs(u[i*this->n + j]) < fabs(u[i*this->n])) + u[i*this->n + j] = value;// u[i*this->n]; + } + else if(boundaryCondition == 10) + { + for(int i = 1; i < this->n; i++) + for(int j =0 ;j < this->n -1; j++) + //if(fabs(u[i*this->n + j]) < fabs(u[(i+1)*this->n - 1])) + u[i*this->n + j] = value;// u[(i+1)*this->n - 1]; + } + else if(boundaryCondition == 3) + { + for(int j = 0; j < this->n - 1; j++) + for(int i = 0;i < this->n - 1; i++) + //if(fabs(u[i*this->n + j]) < fabs(u[j + this->n*(this->n - 1)])) + u[i*this->n + j] = value;// u[j + this->n*(this->n - 1)]; + } + else if(boundaryCondition == 12) + { + for(int j = 1; j < this->n; j++) + for(int i = 1;i < this->n; i++) + //if(fabs(u[i*this->n + j]) < fabs(u[j])) + u[i*this->n + j] = value;// u[j]; + } +*/ + + + /**/ + + /*if (u.max() > 0.0) + this->stopTime *=(double) this->gridCols;*/ + + + double time = 0.0; + double currentTau = this->tau0; + double finalTime = this->stopTime;// + 3.0*(u.max() - u.min()); + if( time + currentTau > finalTime ) currentTau = finalTime - time; + + double maxResidue( 1.0 ); + //double lastResidue( 10000.0 ); + tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); + tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); + while( time < finalTime /*|| maxResidue > subMesh.template getSpaceStepsProducts< 1, 0 >()*/) + { + /**** + * Compute the RHS + */ + + for( int i = 0; i < fu.getSize(); i ++ ) + { + Entity.setCoordinates(tnlStaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x())); + Entity.refresh(); + neighbourEntities.refresh(subMesh,Entity.getIndex()); + fu[ i ] = schemeHost.getValue( this->subMesh, i, tnlStaticVector<2,int>(i % subMesh.getDimensions().x(),i / subMesh.getDimensions().x()), u, time, boundaryCondition,neighbourEntities); + } + maxResidue = fu. absMax(); + + + if( this -> cflCondition * maxResidue != 0.0) + currentTau = this -> cflCondition / maxResidue; + + /* if (maxResidue < 0.05) + cout << "Max < 0.05" << endl;*/ + if(currentTau > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) + { + //cout << currentTau << " >= " << 2.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >() << endl; + currentTau = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + /*if(maxResidue > lastResidue) + currentTau *=(1.0/10.0);*/ + + + if( time + currentTau > finalTime ) currentTau = finalTime - time; +// for( int i = 0; i < fu.getSize(); i ++ ) +// { +// //cout << "Too big RHS! i = " << i << ", fu = " << fu[i] << ", u = " << u[i] << endl; +// if((u[i]+currentTau * fu[ i ])*u[i] < 0.0 && fu[i] != 0.0 && u[i] != 0.0 ) +// currentTau = fabs(u[i]/(2.0*fu[i])); +// +// } + + + for( int i = 0; i < fu.getSize(); i ++ ) + { + double add = u[i] + currentTau * fu[ i ]; + //if( fabs(u[i]) < fabs(add) or (this->subgridValues[subGridID] == this->currentStep +4) ) + u[ i ] = add; + } + time += currentTau; + + //cout << '\r' << flush; + //cout << maxResidue << " " << currentTau << " @ " << time << flush; + //lastResidue = maxResidue; + } + //cout << "Time: " << time << ", Res: " << maxResidue <<endl; + /*if (u.max() > 0.0) + this->stopTime /=(double) this->gridCols;*/ + + VectorType solution; + solution.setLike(u); + for( int i = 0; i < u.getSize(); i ++ ) + { + solution[i]=u[i]; + } + return solution; +} + + +#ifdef HAVE_CUDA + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) +{ + //int j = threadIdx.x + threadIdx.y * blockDim.x; + int th = (blockIdx.y) * caller->n*caller->n*caller->gridCols + + (blockIdx.x) * caller->n + + threadIdx.y * caller->n*caller->gridCols + + threadIdx.x; + //printf("i= %d,j= %d,th= %d\n",i,j,th); + *a = caller->work_u_cuda[th]; + //printf("Hi %f \n", *a); + //return ret; +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::updateSubgridCUDA2D( const int i ,tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller, double* a) +{ +// int j = threadIdx.x + threadIdx.y * blockDim.x; + int index = (blockIdx.y) * caller->n*caller->n*caller->gridCols + + (blockIdx.x) * caller->n + + threadIdx.y * caller->n*caller->gridCols + + threadIdx.x; + + if( (fabs(caller->work_u_cuda[index]) > fabs(*a)) || (caller->unusedCell_cuda[index] == 1) ) + { + caller->work_u_cuda[index] = *a; + caller->unusedCell_cuda[index] = 0; + + } + + *a = caller->work_u_cuda[index]; +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::insertSubgridCUDA2D( double u, const int i ) +{ + + +// int j = threadIdx.x + threadIdx.y * blockDim.x; + //printf("j = %d, u = %f\n", j,u); + + int index = (blockIdx.y)*this->n*this->n*this->gridCols + + (blockIdx.x)*this->n + + threadIdx.y*this->n*this->gridCols + + threadIdx.x; + + //printf("i= %d,j= %d,index= %d\n",i,j,index); + if( (fabs(this->work_u_cuda[index]) > fabs(u)) || (this->unusedCell_cuda[index] == 1) ) + { + this->work_u_cuda[index] = u; + this->unusedCell_cuda[index] = 0; + + } + + +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID) +{ + + __shared__ int tmp; + __shared__ double value; + //double tmpRes = 0.0; + volatile double* sharedTau = &u[blockDim.x*blockDim.y]; + double* map_local = &u[2*blockDim.x*blockDim.y]; + + int i = threadIdx.x; + int j = threadIdx.y; + int l = threadIdx.y * blockDim.x + threadIdx.x; + int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; + + /* LOAD MAP */ + int index = (blockIdx.y) * this->n*this->n*this->gridCols + + (blockIdx.x) * this->n + + threadIdx.y * this->n*this->gridCols + + threadIdx.x; + map_local[l]=this->map_stretched_cuda[index]; + /* LOADED */ + + bool computeFU = !((i == 0 && (boundaryCondition & 4)) or + (i == blockDim.x - 1 && (boundaryCondition & 2)) or + (j == 0 && (boundaryCondition & 8)) or + (j == blockDim.y - 1 && (boundaryCondition & 1))); + + if(l == 0) + { + tmp = 0; + int centreGID = (blockDim.y*blockIdx.y + (blockDim.y>>1))*(blockDim.x*gridDim.x) + blockDim.x*blockIdx.x + (blockDim.x>>1); + if(this->unusedCell_cuda[centreGID] == 0 || boundaryCondition == 0) + tmp = 1; + } + __syncthreads(); + + /*if(!tmp && (u[0]*u[l] <= 0.0)) + atomicMax( &tmp, 1);*/ + + __syncthreads(); + if(tmp !=1) + { +// if(computeFU) +// absVal[l]=0.0; +// else +// absVal[l] = fabs(u[l]); +// +// __syncthreads(); +// +// if((blockDim.x == 16) && (l < 128)) absVal[l] = Max(absVal[l],absVal[l+128]); +// __syncthreads(); +// if((blockDim.x == 16) && (l < 64)) absVal[l] = Max(absVal[l],absVal[l+64]); +// __syncthreads(); +// if(l < 32) absVal[l] = Max(absVal[l],absVal[l+32]); +// if(l < 16) absVal[l] = Max(absVal[l],absVal[l+16]); +// if(l < 8) absVal[l] = Max(absVal[l],absVal[l+8]); +// if(l < 4) absVal[l] = Max(absVal[l],absVal[l+4]); +// if(l < 2) absVal[l] = Max(absVal[l],absVal[l+2]); +// if(l < 1) value = Sign(u[0])*Max(absVal[l],absVal[l+1]); +// __syncthreads(); +// +// if(computeFU) +// u[l] = value; + if(computeFU) + { + if(boundaryCondition == 4) + u[l] = u[threadIdx.y * blockDim.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.x+this->n); + else if(boundaryCondition == 2) + u[l] = u[threadIdx.y * blockDim.x + blockDim.x - 1] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.x);//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.x - threadIdx.x - 1+this->n); + else if(boundaryCondition == 8) + u[l] = u[threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y) ;//+ 2*Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(threadIdx.y+this->n); + else if(boundaryCondition == 1) + u[l] = u[(blockDim.y - 1)* blockDim.x + threadIdx.x] + Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(this->n - 1 - threadIdx.y) ;//+ Sign(u[0])*this->subMesh.template getSpaceStepsProducts< 1, 0 >()*(blockDim.y - threadIdx.y - 1 +this->n); + } + } + + double time = 0.0; + __shared__ double currentTau; + double cfl = this->cflCondition; + double fu = 0.0; +// if(threadIdx.x * threadIdx.y == 0) +// { +// currentTau = finalTime; +// } + double finalTime = this->stopTime; + __syncthreads(); +// if( time + currentTau > finalTime ) currentTau = finalTime - time; + + tnlGridEntity<MeshType, 2, tnlGridEntityNoStencilStorage > Entity(subMesh); + tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities(Entity); + Entity.setCoordinates(tnlStaticVector<2,int>(i,j)); + Entity.refresh(); + neighbourEntities.refresh(subMesh,Entity.getIndex()); + + + if(map_local[l] == 0.0) + { + u[l] = /*Sign(u[l])**/MAP_SOLVER_MAX_VALUE; + computeFU = false; + } + __syncthreads(); + + + while( time < finalTime ) + { + sharedTau[l] = finalTime; + + if(computeFU) + { + fu = schemeHost.getValueDev( this->subMesh, l, tnlStaticVector<2,int>(i,j)/*this->subMesh.getCellCoordinates(l)*/, u, time, boundaryCondition, neighbourEntities, map_local); + sharedTau[l]=abs(cfl/fu); + } + + + + if(l == 0) + { + if(sharedTau[0] > 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >()) sharedTau[0] = 1.0 * this->subMesh.template getSpaceStepsProducts< 1, 0 >(); + } + else if(l == blockDim.x*blockDim.y - 1) + if( time + sharedTau[l] > finalTime ) sharedTau[l] = finalTime - time; + + +// if( (Sign(u[l]+sharedTau[l]*fu) != Sign(u[l])) && fu != 0.0 && fu != -0.0) +// { +// printf("orig: %10f", sharedTau[l]); +// sharedTau[l]=abs(u[l]/(1.1*fu)) ; +// printf(" new: %10f\n", sharedTau[l]); +// } + + + + if((blockDim.x == 16) && (l < 128)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+128]); + __syncthreads(); + if((blockDim.x == 16) && (l < 64)) sharedTau[l] = Min(sharedTau[l],sharedTau[l+64]); + __syncthreads(); + if(l < 32) sharedTau[l] = Min(sharedTau[l],sharedTau[l+32]); + if(l < 16) sharedTau[l] = Min(sharedTau[l],sharedTau[l+16]); + if(l < 8) sharedTau[l] = Min(sharedTau[l],sharedTau[l+8]); + if(l < 4) sharedTau[l] = Min(sharedTau[l],sharedTau[l+4]); + if(l < 2) sharedTau[l] = Min(sharedTau[l],sharedTau[l+2]); + if(l < 1) currentTau = Min(sharedTau[l],sharedTau[l+1]); + __syncthreads(); + + if(computeFU) + u[l] += currentTau * fu; +// if(abs(u[l]) > MAP_SOLVER_MAX_VALUE) +// { +// u[l] = /*Sign(u[l])**/MAP_SOLVER_MAX_VALUE; +// computeFU = false; +// } + time += currentTau; + } + + +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getOwnerCUDA2D(int i) const +{ + + return ((i / (this->gridCols*this->n*this->n))*this->gridCols + + (i % (this->gridCols*this->n))/this->n); +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getSubgridValueCUDA2D( int i ) const +{ + return this->subgridValues_cuda[i]; +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setSubgridValueCUDA2D(int i, int value) +{ + this->subgridValues_cuda[i] = value; +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +int tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::getBoundaryConditionCUDA2D( int i ) const +{ + return this->boundaryConditions_cuda[i]; +} + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__device__ +void tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::setBoundaryConditionCUDA2D(int i, int value) +{ + this->boundaryConditions_cuda[i] = value; +} + + + +//north - 1, east - 2, west - 4, south - 8 + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ +void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/synchronizeCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) //needs fix ---- maybe not anymore --- but frankly: yeah, it does -- aaaa-and maybe fixed now +{ + + __shared__ int boundary[4]; // north,east,west,south + __shared__ int subgridValue; + __shared__ int newSubgridValue; + + + int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; + double u = cudaSolver->work_u_cuda[gid]; + double u_cmp; + int subgridValue_cmp=INT_MAX; + int boundary_index=0; + + + if(threadIdx.x+threadIdx.y == 0) + { + subgridValue = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x); + boundary[0] = 0; + boundary[1] = 0; + boundary[2] = 0; + boundary[3] = 0; + newSubgridValue = 0; + //printf("%d %d\n", blockDim.x, gridDim.x); + } + __syncthreads(); + + + + if( (threadIdx.x == 0 /* && !(cudaSolver->currentStep & 1)*/) || + (threadIdx.y == 0 /* && (cudaSolver->currentStep & 1)*/) || + (threadIdx.x == blockDim.x - 1 /* && !(cudaSolver->currentStep & 1)*/) || + (threadIdx.y == blockDim.y - 1 /* && (cudaSolver->currentStep & 1)*/) ) + { + if(threadIdx.x == 0 && (blockIdx.x != 0)/* && !(cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid - 1]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x - 1); + boundary_index = 2; + } + + if(threadIdx.x == blockDim.x - 1 && (blockIdx.x != gridDim.x - 1)/* && !(cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid + 1]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x + 1); + boundary_index = 1; + } + + __threadfence(); + if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) + { + cudaSolver->unusedCell_cuda[gid] = 0; + atomicMax(&newSubgridValue, INT_MAX); + atomicMax(&boundary[boundary_index], 1); + cudaSolver->work_u_cuda[gid] = u_cmp; + u=u_cmp; + } + __threadfence(); + if(threadIdx.y == 0 && (blockIdx.y != 0)/* && (cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid - blockDim.x*gridDim.x]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y - 1)*gridDim.x + blockIdx.x); + boundary_index = 3; + } + if(threadIdx.y == blockDim.y - 1 && (blockIdx.y != gridDim.y - 1)/* && (cudaSolver->currentStep & 1)*/) + { + u_cmp = cudaSolver->work_u_cuda[gid + blockDim.x*gridDim.x]; + subgridValue_cmp = cudaSolver->getSubgridValueCUDA2D((blockIdx.y + 1)*gridDim.x + blockIdx.x); + boundary_index = 0; + } + +// __threadfence(); + if((subgridValue == INT_MAX || fabs(u_cmp) + cudaSolver->delta < fabs(u) ) && (subgridValue_cmp != INT_MAX && subgridValue_cmp != -INT_MAX)) + { + cudaSolver->unusedCell_cuda[gid] = 0; + atomicMax(&newSubgridValue, INT_MAX); + atomicMax(&boundary[boundary_index], 1); + cudaSolver->work_u_cuda[gid] = u_cmp; + } + } + __threadfence(); + __syncthreads(); + + if(threadIdx.x+threadIdx.y == 0) + { + if(subgridValue == INT_MAX && newSubgridValue !=0) + cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, -INT_MAX); + + cudaSolver->setBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, boundary[0] + + 2 * boundary[1] + + 4 * boundary[2] + + 8 * boundary[3]); + + + if(blockIdx.x+blockIdx.y ==0) + { + cudaSolver->currentStep = cudaSolver->currentStep + 1; + *(cudaSolver->runcuda) = 0; + } +// +// int stepValue = cudaSolver->currentStep + 4; +// if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) +// cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); +// +// atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); + } + + + /* + //printf("I am not an empty kernel!\n"); + //cout << "Synchronizig..." << endl; + int tmp1, tmp2; + int grid1, grid2; + + if(cudaSolver->currentStep & 1) + { + //printf("I am not an empty kernel! 1\n"); + for(int j = 0; j < cudaSolver->gridRows - 1; j++) + { + //printf("I am not an empty kernel! 3\n"); + for (int i = 0; i < cudaSolver->gridCols*cudaSolver->n; i++) + { + tmp1 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n-1)+j*cudaSolver->n) + i; + tmp2 = cudaSolver->gridCols*cudaSolver->n*((cudaSolver->n)+j*cudaSolver->n) + i; + grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); + grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); + + if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) + { + //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); + cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; + cudaSolver->unusedCell_cuda[tmp2] = 0; + if(grid2 == INT_MAX) + { + cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); + } + if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 8) ) + cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+8); + } + else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) + { + //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); + cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; + cudaSolver->unusedCell_cuda[tmp1] = 0; + if(grid1 == INT_MAX) + { + cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); + } + if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 1) ) + cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+1); + } + } + } + + } + else + { + //printf("I am not an empty kernel! 2\n"); + for(int i = 1; i < cudaSolver->gridCols; i++) + { + //printf("I am not an empty kernel! 4\n"); + for (int j = 0; j < cudaSolver->gridRows*cudaSolver->n; j++) + { + + tmp1 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n - 1; + tmp2 = cudaSolver->gridCols*cudaSolver->n*j + i*cudaSolver->n ; + grid1 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)); + grid2 = cudaSolver->getSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)); + + if ((fabs(cudaSolver->work_u_cuda[tmp1]) < fabs(cudaSolver->work_u_cuda[tmp2]) - cudaSolver->delta || grid2 == INT_MAX || grid2 == -INT_MAX) && (grid1 != INT_MAX && grid1 != -INT_MAX)) + { + //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); + cudaSolver->work_u_cuda[tmp2] = cudaSolver->work_u_cuda[tmp1]; + cudaSolver->unusedCell_cuda[tmp2] = 0; + if(grid2 == INT_MAX) + { + cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), -INT_MAX); + } + if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2)) & 4) ) + cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp2))+4); + } + else if ((fabs(cudaSolver->work_u_cuda[tmp1]) > fabs(cudaSolver->work_u_cuda[tmp2]) + cudaSolver->delta || grid1 == INT_MAX || grid1 == -INT_MAX) && (grid2 != INT_MAX && grid2 != -INT_MAX)) + { + //printf("%d %d %d %d \n",tmp1,tmp2,cudaSolver->getOwnerCUDA2D(tmp1),cudaSolver->getOwnerCUDA2D(tmp2)); + cudaSolver->work_u_cuda[tmp1] = cudaSolver->work_u_cuda[tmp2]; + cudaSolver->unusedCell_cuda[tmp1] = 0; + if(grid1 == INT_MAX) + { + cudaSolver->setSubgridValueCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), -INT_MAX); + } + if(! (cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1)) & 2) ) + cudaSolver->setBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1), cudaSolver->getBoundaryConditionCUDA2D(cudaSolver->getOwnerCUDA2D(tmp1))+2); + } + } + } + } + //printf("I am not an empty kernel! 5 cudaSolver->currentStep : %d \n", cudaSolver->currentStep); + + cudaSolver->currentStep = cudaSolver->currentStep + 1; + int stepValue = cudaSolver->currentStep + 4; + for (int i = 0; i < cudaSolver->gridRows * cudaSolver->gridCols; i++) + { + if( cudaSolver->getSubgridValueCUDA2D(i) == -INT_MAX ) + cudaSolver->setSubgridValueCUDA2D(i, stepValue); + } + + int maxi = 0; + for(int q=0; q < cudaSolver->gridRows*cudaSolver->gridCols;q++) + { + //printf("%d : %d\n", q, cudaSolver->boundaryConditions_cuda[q]); + maxi=Max(maxi,cudaSolver->getBoundaryConditionCUDA2D(q)); + } + //printf("I am not an empty kernel! %d\n", maxi); + *(cudaSolver->runcuda) = (maxi > 0); + //printf("I am not an empty kernel! 7 %d\n", cudaSolver->boundaryConditions_cuda[0]); + //cout << "Grid synchronized at step " << (this->currentStep - 1 ) << endl; +*/ +} + + + +template <typename SchemeHost, typename SchemeDevice, typename Device> +__global__ +void synchronize2CUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver) +{ +// if(blockIdx.x+blockIdx.y ==0) +// { +// cudaSolver->currentStep = cudaSolver->currentStep + 1; +// *(cudaSolver->runcuda) = 0; +// } + + int stepValue = cudaSolver->currentStep + 4; + if( cudaSolver->getSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x) == -INT_MAX ) + cudaSolver->setSubgridValueCUDA2D(blockIdx.y*gridDim.x + blockIdx.x, stepValue); + + atomicMax((cudaSolver->runcuda),cudaSolver->getBoundaryConditionCUDA2D(blockIdx.y*gridDim.x + blockIdx.x)); +} + + + + + + + + +template< typename SchemeHost, typename SchemeDevice, typename Device> +__global__ +void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA2D( tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , int* ptr2, int* ptr3, double* tmp_map_ptr) +{ + //cout << "Initializating solver..." << endl; + //const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh"); + //this->mesh_cuda.load( meshLocation ); + + //this->n_cuda = parameters.getParameter <int>("subgrid-size"); + //cout << "Setting N << this->n_cuda << endl; + + //this->subMesh_cuda.setDimensions( this->n_cuda, this->n_cuda ); + //this->subMesh_cuda.setDomain( tnlStaticVector<2,double>(0.0, 0.0), + //tnlStaticVector<2,double>(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*(double)(this->n_cuda), this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()*(double)(this->n_cuda)) ); + + //this->subMesh_cuda.save("submesh.tnl"); + +// const tnlString& initialCondition = parameters.getParameter <tnlString>("initial-condition"); +// this->u0.load( initialCondition ); + + //cout << this->mesh.getCellCenter(0) << endl; + + //this->delta_cuda = parameters.getParameter <double>("delta"); + //this->delta_cuda *= this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >(); + + //cout << "Setting delta to " << this->delta << endl; + + //this->tau0_cuda = parameters.getParameter <double>("initial-tau"); + //cout << "Setting initial tau to " << this->tau0_cuda << endl; + //this->stopTime_cuda = parameters.getParameter <double>("stop-time"); + + //this->cflCondition_cuda = parameters.getParameter <double>("cfl-condition"); + //this -> cflCondition_cuda *= sqrt(this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >()*this->mesh_cuda.template getSpaceStepsProducts< 0, 1 >()); + //cout << "Setting CFL to " << this->cflCondition << endl; +//// +//// + +// this->gridRows_cuda = gridRows; +// this->gridCols_cuda = gridCols; + + cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); + cudaSolver->map_stretched_cuda = tmp_map_ptr; + cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); + cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); + cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); + cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); + *(cudaSolver->runcuda) = 1; + cudaSolver->currentStep = 1; + //cudaMemcpy(ptr,&(cudaSolver->work_u_cuda), sizeof(double*),cudaMemcpyDeviceToHost); + //ptr = cudaSolver->work_u_cuda; + printf("GPU memory allocated.\n"); + + for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) + { + cudaSolver->subgridValues_cuda[i] = INT_MAX; + cudaSolver->boundaryConditions_cuda[i] = 0; + } + + /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) + { + printf("%d\n",j); + cudaSolver->unusedCell_cuda[ j] = 1; + }*/ + printf("GPU memory initialized.\n"); + + + //cudaSolver->work_u_cuda[50] = 32.153438; +//// +//// + //stretchGrid(); + //this->stopTime_cuda /= (double)(this->gridCols_cuda); + //this->stopTime_cuda *= (1.0+1.0/((double)(this->n_cuda) - 1.0)); + //cout << "Setting stopping time to " << this->stopTime << endl; + //this->stopTime_cuda = 1.5*((double)(this->n_cuda))*parameters.getParameter <double>("stop-time")*this->mesh_cuda.template getSpaceStepsProducts< 1, 0 >(); + //cout << "Setting stopping time to " << this->stopTime << endl; + + //cout << "Initializating scheme..." << endl; + //if(!this->schemeDevice.init(parameters)) +// { + //cerr << "Scheme failed to initialize." << endl; +// return false; +// } + //cout << "Scheme initialized." << endl; + + //test(); + +// this->currentStep_cuda = 1; + //return true; +} + + + + +//extern __shared__ double array[]; +template< typename SchemeHost, typename SchemeDevice, typename Device > +__global__ +void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/initRunCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) + +{ + + + extern __shared__ double u[]; + //printf("%p\n",caller->work_u_cuda); + + int i = blockIdx.y * gridDim.x + blockIdx.x; + int l = threadIdx.y * blockDim.x + threadIdx.x; + + __shared__ int containsCurve; + if(l == 0) + containsCurve = 0; + + //double a; + caller->getSubgridCUDA2D(i,caller, &u[l]); + //printf("%f %f\n",a , u[l]); + //u[l] = a; + //printf("Hi %f \n", u[l]); + __syncthreads(); + //printf("hurewrwr %f \n", u[l]); + if(u[0] * u[l] <= 0.0) + { + //printf("contains %d \n",i); + atomicMax( &containsCurve, 1); + } + + __syncthreads(); + //printf("hu"); + //printf("%d : %f\n", l, u[l]); + if(containsCurve == 1) + { + //printf("have curve \n"); + caller->runSubgridCUDA2D(0,u,i); + //printf("%d : %f\n", l, u[l]); + __syncthreads(); + caller->insertSubgridCUDA2D(u[l],i); + __syncthreads(); + if(l == 0) + caller->setSubgridValueCUDA2D(i, 4); + } + + +} + + + + + +template< typename SchemeHost, typename SchemeDevice, typename Device > +__global__ +void /*tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int>::*/runCUDA2D(tnlParallelMapSolver<2,SchemeHost, SchemeDevice, Device, double, int >* caller) +{ + extern __shared__ double u[]; + int i = blockIdx.y * gridDim.x + blockIdx.x; + int l = threadIdx.y * blockDim.x + threadIdx.x; + int bound = caller->getBoundaryConditionCUDA2D(i); + + if(caller->getSubgridValueCUDA2D(i) != INT_MAX && bound != 0 && caller->getSubgridValueCUDA2D(i) > 0) + { + caller->getSubgridCUDA2D(i,caller, &u[l]); + + //if(l == 0) + //printf("i = %d, bound = %d\n",i,caller->getSubgridValueCUDA2D(i)); + if(caller->getSubgridValueCUDA2D(i) == caller->currentStep+4) + { + if(bound & 1) + { + caller->runSubgridCUDA2D(1,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if(bound & 2 ) + { + caller->runSubgridCUDA2D(2,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if(bound & 4) + { + caller->runSubgridCUDA2D(4,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if(bound & 8) + { + caller->runSubgridCUDA2D(8,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + + + + + + if( ((bound & 3 ))) + { + caller->runSubgridCUDA2D(3,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound & 5 ))) + { + caller->runSubgridCUDA2D(5,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound & 10 ))) + { + caller->runSubgridCUDA2D(10,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( (bound & 12 )) + { + caller->runSubgridCUDA2D(12,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + + + + + + } + + + else + { + + + + + + + + + + if( ((bound == 2))) + { + caller->runSubgridCUDA2D(2,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound == 1) )) + { + caller->runSubgridCUDA2D(1,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound == 8) )) + { + caller->runSubgridCUDA2D(8,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( (bound == 4)) + { + caller->runSubgridCUDA2D(4,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + + + + + + + + + + + if( ((bound & 3) )) + { + caller->runSubgridCUDA2D(3,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound & 5) )) + { + caller->runSubgridCUDA2D(5,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( ((bound & 10) )) + { + caller->runSubgridCUDA2D(10,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + if( (bound & 12) ) + { + caller->runSubgridCUDA2D(12,u,i); + //__syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + } + + + + + + + + + + + + + } + /*if( bound ) + { + caller->runSubgridCUDA2D(15,u,i); + __syncthreads(); + //caller->insertSubgridCUDA2D(u[l],i); + //__syncthreads(); + //caller->getSubgridCUDA2D(i,caller, &u[l]); + caller->updateSubgridCUDA2D(i,caller, &u[l]); + __syncthreads(); + }*/ + + if(l==0) + { + caller->setBoundaryConditionCUDA2D(i, 0); + caller->setSubgridValueCUDA2D(i, caller->getSubgridValueCUDA2D(i) - 1 ); + } + + + } + + + +} + +#endif /*HAVE_CUDA*/ + +#endif /* TNLPARALLELMAPSOLVER2D_IMPL_H_ */ diff --git a/examples/hamilton-jacobi-parallel/CMakeLists.txt b/examples/hamilton-jacobi-parallel/CMakeLists.txt index 3911e6108b490c4d6fe52f174b3151e986a18e73..b65f3e6a4c8d0cc22a5e10dc46f298fcc5e0dfc0 100755 --- a/examples/hamilton-jacobi-parallel/CMakeLists.txt +++ b/examples/hamilton-jacobi-parallel/CMakeLists.txt @@ -1,6 +1,7 @@ set( tnl_hamilton_jacobi_parallel_SOURCES # MainBuildConfig.h -# tnlParallelEikonalSolver_impl.h +# tnlParallelEikonalSolver2D_impl.h +# tnlParallelEikonalSolver3D_impl.h # tnlParallelEikonalSolver.h # parallelEikonalConfig.h main.cpp) diff --git a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h index b79341e909422b0b2ac67abb7ad0080a52ad2441..bcc4c1fe8040b9a5b5027149af4a74de6746b150 100644 --- a/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h +++ b/examples/hamilton-jacobi-parallel/parallelEikonalConfig.h @@ -1,5 +1,5 @@ /*************************************************************************** - hamiltonJacobiProblemConfig.h - description + parallelEikonalConfig.h - description ------------------- begin : Oct 5, 2014 copyright : (C) 2014 by Tomas Sobotik @@ -15,8 +15,8 @@ * * ***************************************************************************/ -#ifndef HAMILTONJACOBIPROBLEMCONFIG_H_ -#define HAMILTONJACOBIPROBLEMCONFIG_H_ +#ifndef HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ +#define HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ #include <config/tnlConfigDescription.h> @@ -43,4 +43,4 @@ class parallelEikonalConfig } }; -#endif /* HAMILTONJACOBIPROBLEMCONFIG_H_ */ +#endif /* HAMILTONJACOBIPARALLELEIKONALPROBLEMCONFIG_H_ */ diff --git a/src/operators/godunov-eikonal/CMakeLists.txt b/src/operators/godunov-eikonal/CMakeLists.txt index c24fc262f85f7a8c9a9fdfb0add69564cf2ca00a..8b74f755ed0a74e7001f1c155c60f61c650679d2 100755 --- a/src/operators/godunov-eikonal/CMakeLists.txt +++ b/src/operators/godunov-eikonal/CMakeLists.txt @@ -1,11 +1,13 @@ set( headers godunovEikonal.h parallelGodunovEikonal.h + parallelGodunovMap.h godunovEikonal1D_impl.h godunovEikonal2D_impl.h godunovEikonal3D_impl.h parallelGodunovEikonal1D_impl.h parallelGodunovEikonal2D_impl.h - parallelGodunovEikonal3D_impl.h) + parallelGodunovEikonal3D_impl.h + parallelGodunovMap2D_impl.h) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/operators/godunov-eikonal ) diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal.h b/src/operators/godunov-eikonal/parallelGodunovEikonal.h index 6954c3008c8a43ff85efd6ef806b664b13ba735c..5e0affb345ec52ebc63f17a52969a06269cf31d2 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal.h @@ -1,5 +1,5 @@ /*************************************************************************** - godunov.h - description + parallelGodunovEikonal.h - description ------------------- begin : Dec 1 , 2014 copyright : (C) 2014 by Tomas Sobotik @@ -14,8 +14,8 @@ * * ***************************************************************************/ -#ifndef GODUNOVEIKONAL_H_ -#define GODUNOVEIKONAL_H_ +#ifndef PARALLELGODUNOVEIKONAL_H_ +#define PARALLELGODUNOVEIKONAL_H_ #include <matrices/tnlCSRMatrix.h> #include <solvers/preconditioners/tnlDummyPreconditioner.h> @@ -266,4 +266,4 @@ protected: #include <operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h> -#endif /* GODUNOVEIKONAL_H_ */ +#endif /* PARALLELGODUNOVEIKONAL_H_ */ diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h index cb0670518470254a201d9d025b2fa2b271d3f76c..cf12f544d7dafee24cfc992c3902038d03b14c80 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal1D_impl.h @@ -14,8 +14,8 @@ * * ***************************************************************************/ -#ifndef GODUNOVEIKONAL1D_IMPL_H_ -#define GODUNOVEIKONAL1D_IMPL_H_ +#ifndef PARALLELGODUNOVEIKONAL1D_IMPL_H_ +#define PARALLELGODUNOVEIKONAL1D_IMPL_H_ @@ -167,4 +167,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 1, MeshReal, Device, MeshIndex >, Re } -#endif /* GODUNOVEIKONAL1D_IMPL_H_ */ +#endif /* PARALLELGODUNOVEIKONAL1D_IMPL_H_ */ diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h index 5ac0dd0d9b05ab21ade02f4526c9b65f01337f5c..7a762a09a9ce7c428c2d429f4db10ac7f70d30fb 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal2D_impl.h @@ -14,8 +14,8 @@ * * ***************************************************************************/ -#ifndef GODUNOVEIKONAL2D_IMPL_H_ -#define GODUNOVEIKONAL2D_IMPL_H_ +#ifndef PARALLELGODUNOVEIKONAL2D_IMPL_H_ +#define PARALLELGODUNOVEIKONAL2D_IMPL_H_ template< typename MeshReal, @@ -481,4 +481,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Re } -#endif /* GODUNOVEIKONAL2D_IMPL_H_ */ +#endif /* PARALLELGODUNOVEIKONAL2D_IMPL_H_ */ diff --git a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h index 2ab0f2c42eae9fe7100ea4cd76f6446dcd4362cf..7206cc9a40d69f1c70441238dddd42e106ef5949 100644 --- a/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h +++ b/src/operators/godunov-eikonal/parallelGodunovEikonal3D_impl.h @@ -14,8 +14,8 @@ * * ***************************************************************************/ -#ifndef GODUNOVEIKONAL3D_IMPL_H_ -#define GODUNOVEIKONAL3D_IMPL_H_ +#ifndef PARALLELGODUNOVEIKONAL3D_IMPL_H_ +#define PARALLELGODUNOVEIKONAL3D_IMPL_H_ template< typename MeshReal, @@ -384,4 +384,4 @@ Real parallelGodunovEikonalScheme< tnlGrid< 3, MeshReal, Device, MeshIndex >, Re } -#endif /* GODUNOVEIKONAL3D_IMPL_H_ */ +#endif /* PARALLELGODUNOVEIKONAL3D_IMPL_H_ */ diff --git a/src/operators/godunov-eikonal/parallelGodunovMap.h b/src/operators/godunov-eikonal/parallelGodunovMap.h new file mode 100644 index 0000000000000000000000000000000000000000..19a764d9ba2535985549a08484c84c6d09084201 --- /dev/null +++ b/src/operators/godunov-eikonal/parallelGodunovMap.h @@ -0,0 +1,270 @@ +/*************************************************************************** + parallelGodunovMap.h - description + ------------------- + begin : Dec 1 , 2014 + copyright : (C) 2014 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef PARALLELGODUNOVMAP_H_ +#define PARALLELGODUNOVMAP_H_ + +#include <matrices/tnlCSRMatrix.h> +#include <solvers/preconditioners/tnlDummyPreconditioner.h> +#include <solvers/tnlSolverMonitor.h> +#include <core/tnlLogger.h> +#include <core/vectors/tnlVector.h> +#include <core/vectors/tnlSharedVector.h> +#include <core/mfilename.h> +#include <mesh/tnlGrid.h> +#include <core/tnlCuda.h> +#include <mesh/grids/tnlGridEntity.h> + + +template< typename Mesh, + typename Real, + typename Index > +class parallelGodunovMapScheme +{ +}; + + + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +class parallelGodunovMapScheme< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index > +{ + +public: + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef tnlGrid< 1, Real, Device, Index > MeshType; + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef typename MeshType::CoordinatesType CoordinatesType; + + +#ifdef HAVE_CUDA + __device__ __host__ +#endif + static tnlString getType(); +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType positivePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType negativePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType sign(const RealType x, const RealType eps) const; + + template< typename Vector > + #ifdef HAVE_CUDA + __device__ __host__ + #endif + Real getValue( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const Vector& u, + const RealType& time, + const IndexType boundaryCondition) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + bool init( const tnlParameterContainer& parameters ); + + RealType epsilon; + +protected: + + MeshType originalMesh; + + DofVectorType dofVector; + + RealType h; + + + + +}; + + + + + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +class parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > +{ + +public: + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef tnlGrid< 2, Real, Device, Index > MeshType; + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef typename MeshType::CoordinatesType CoordinatesType; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + static tnlString getType(); +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType positivePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType negativePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType sign(const RealType x, const Real eps) const; + + template< typename Vector > + #ifdef HAVE_CUDA + __device__ __host__ + #endif + Real getValue( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const Vector& u, + const RealType& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities) const; + + #ifdef HAVE_CUDA + __device__ + #endif + Real getValueDev( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const RealType* u, + const RealType& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities, + const RealType* map) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + bool init( const tnlParameterContainer& parameters ); + + RealType epsilon; + +protected: + + MeshType originalMesh; + + DofVectorType dofVector; + + RealType hx, ihx; + RealType hy, ihy; + + + + +}; + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +class parallelGodunovMapScheme< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > +{ + +public: + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef tnlGrid< 3, Real, Device, Index > MeshType; + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef typename MeshType::CoordinatesType CoordinatesType; + +#ifdef HAVE_CUDA + __device__ __host__ +#endif + static tnlString getType(); +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType positivePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType negativePart(const RealType arg) const; +#ifdef HAVE_CUDA + __device__ __host__ +#endif + RealType sign(const RealType x, const Real eps) const; + + template< typename Vector > + #ifdef HAVE_CUDA + __device__ __host__ + #endif + Real getValue( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const Vector& u, + const RealType& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities ) const; + +#ifdef HAVE_CUDA + __device__ +#endif + Real getValueDev( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const RealType* u, + const RealType& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 3, tnlGridEntityNoStencilStorage >,3> neighbourEntities) const; + +#ifdef HAVE_CUDA + __device__ __host__ +#endif + bool init( const tnlParameterContainer& parameters ); + + RealType epsilon; + +protected: + + MeshType originalMesh; + + DofVectorType dofVector; + + RealType hx, ihx; + RealType hy, ihy; + RealType hz, ihz; + +}; + + + +//#include <operators/godunov-eikonal/parallelGodunovMap1D_impl.h> +#include <operators/godunov-eikonal/parallelGodunovMap2D_impl.h> +//#include <operators/godunov-eikonal/parallelGodunovMap3D_impl.h> + + +#endif /* PARALLELGODUNOVMAP_H_ */ diff --git a/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..3be04db3fde451ade9dae1dea33984dff52db93a --- /dev/null +++ b/src/operators/godunov-eikonal/parallelGodunovMap2D_impl.h @@ -0,0 +1,507 @@ +/*************************************************************************** + parallelGodunovMap2D_impl.h - description + ------------------- + begin : Dec 1 , 2014 + copyright : (C) 2014 by Tomas Sobotik + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef PARALLELGODUNOVMAP2D_IMPL_H_ +#define PARALLELGODUNOVMAP2D_IMPL_H_ + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +Real parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index >:: positivePart(const Real arg) const +{ + if(arg > 0.0) + return arg; + return 0.0; +} + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +Real parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: negativePart(const Real arg) const +{ + if(arg < 0.0) + return -arg; + return 0.0; +} + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +Real parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: sign(const Real x, const Real eps) const +{ + if(x > eps) + return 1.0; + if (x < -eps) + return (-1.0); + + if ( x == 0.0) + return 0.0; + + return sin(/*(M_PI*x)/(2.0*eps) */(M_PI/2.0)*(x/eps)); +} + + + + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +bool parallelGodunovMapScheme< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > :: init( const tnlParameterContainer& parameters ) +{ + + const tnlString& meshFile = parameters.getParameter< tnlString >( "mesh" ); + //MeshType originalMesh; + if( ! originalMesh.load( meshFile ) ) + { + //cerr << "I am not able to load the mesh from the file " << meshFile << "." << endl; + return false; + } + + + + + hx = originalMesh.template getSpaceStepsProducts< 1, 0 >(); + ihx = 1.0/hx; + hy = originalMesh.template getSpaceStepsProducts< 0, 1 >(); + ihy = 1.0/hy; + + this->epsilon = parameters. getParameter< double >( "epsilon" ); + + this->epsilon *=sqrt( hx*hx + hy*hy ); + +// dofVector. setSize( this->mesh.getDofs() ); + + return true; + +} + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +tnlString parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index > :: getType() +{ + return tnlString( "parallelGodunovMapScheme< " ) + + MeshType::getType() + ", " + + ::getType< Real >() + ", " + + ::getType< Index >() + " >"; +} + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > +template< typename Vector > +#ifdef HAVE_CUDA +__device__ __host__ +#endif +Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: getValue( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const Vector& u, + const Real& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities ) const +{ + + + if ( ((coordinates.x() == 0 && (boundaryCondition & 4)) or + (coordinates.x() == mesh.getDimensions().x() - 1 && (boundaryCondition & 2)) or + (coordinates.y() == 0 && (boundaryCondition & 8)) or + (coordinates.y() == mesh.getDimensions().y() - 1 && (boundaryCondition & 1))) + /*and + !( (coordinates.y() == 0 or coordinates.y() == mesh.getDimensions().y() - 1) + and + ( coordinates.x() == 0 or coordinates.x() == mesh.getDimensions().x() - 1) + )*/ + ) + { + return 0.0; + } + + + //----------------------------------- + + RealType signui; + signui = sign(u[cellIndex],this->epsilon); + + +#ifdef HAVE_CUDA + //printf("%d : %d ;;;; %d : %d , %f \n",threadIdx.x, mesh.getDimensions().x() , threadIdx.y,mesh.getDimensions().y(), epsilon ); +#endif + + RealType xb = u[cellIndex]; + RealType xf = -u[cellIndex]; + RealType yb = u[cellIndex]; + RealType yf = -u[cellIndex]; + RealType a,b,c; + + + if(coordinates.x() == mesh.getDimensions().x() - 1) + xf += u[neighbourEntities.template getEntityIndex< -1, 0 >()]; + else + xf += u[neighbourEntities.template getEntityIndex< 1, 0 >()]; + + if(coordinates.x() == 0) + xb -= u[neighbourEntities.template getEntityIndex< 1, 0 >()]; + else + xb -= u[neighbourEntities.template getEntityIndex< -1, 0 >()]; + + if(coordinates.y() == mesh.getDimensions().y() - 1) + yf += u[neighbourEntities.template getEntityIndex< 0, -1 >()]; + else + yf += u[neighbourEntities.template getEntityIndex< 0, 1 >()]; + + if(coordinates.y() == 0) + yb -= u[neighbourEntities.template getEntityIndex< 0, 1 >()]; + else + yb -= u[neighbourEntities.template getEntityIndex< 0, -1 >()]; + + + //xb *= ihx; + //xf *= ihx; + // yb *= ihy; + //yf *= ihy; + + if(signui > 0.0) + { + xf = negativePart(xf); + + xb = positivePart(xb); + + yf = negativePart(yf); + + yb = positivePart(yb); + + } + else if(signui < 0.0) + { + + xb = negativePart(xb); + + xf = positivePart(xf); + + yb = negativePart(yb); + + yf = positivePart(yf); + } + + + if(xb - xf > 0.0) + a = xb; + else + a = xf; + + if(yb - yf > 0.0) + b = yb; + else + b = yf; + + c =(1.0 - sqrt(a*a+b*b)*ihx ); + + if(Sign(c) > 0.0 ) + return Sign(u[cellIndex])*c; + else + return signui*c; + + //-------------------------------------------------- + +// +// +// +// RealType acc = hx*hy*hx*hy; +// +// RealType nabla, xb, xf, yb, yf, signui; +// +// signui = sign(u[cellIndex],this->epsilon); +// +// +// //if(fabs(u[cellIndex]) < acc) return 0.0; +// +// if(signui > 0.0) +// { +// /**/ /* if(boundaryCondition & 2) +// xf = (u[mesh.getCellXSuccessor( cellIndex )] - u[cellIndex])/hx; +// else *//*if(boundaryCondition & 4) +// xf = 0.0; +// else /**/if(coordinates.x() == mesh.getDimensions().x() - 1) +// xf = negativePart((u[neighbourEntities.template getEntityIndex< -1, 0 >()] - u[cellIndex])*ihx); +// else +// xf = negativePart((u[neighbourEntities.template getEntityIndex< 1, 0 >()] - u[cellIndex])*ihx); +// +// /**/ /* if(boundaryCondition & 4) +// xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx; +// else *//*if(boundaryCondition & 2) +// xb = 0.0; +// else /**/if(coordinates.x() == 0) +// xb = positivePart((u[cellIndex] - u[mesh.template getCellNextToCell<+1,0>( cellIndex )])*ihx); +// else +// xb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1, 0 >()])*ihx); +// +// /**/ /* if(boundaryCondition & 1) +// yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy; +// else *//*if(boundaryCondition & 8) +// yf = 0.0; +// else /**/if(coordinates.y() == mesh.getDimensions().y() - 1) +// yf = negativePart((u[neighbourEntities.template getEntityIndex< 0, -1 >()] - u[cellIndex])*ihy); +// else +// yf = negativePart((u[neighbourEntities.template getEntityIndex< 0, 1 >()] - u[cellIndex])*ihy); +// +// /**/ /* if(boundaryCondition & 8) +// yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy; +// else *//*if(boundaryCondition & 1) +// yb = 0.0; +// else /**/if(coordinates.y() == 0) +// yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0, 1 >()])*ihy); +// else +// yb = positivePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0, -1 >()])*ihy); +// +// if(xb - xf > 0.0) +// xf = 0.0; +// else +// xb = 0.0; +// +// if(yb - yf > 0.0) +// yf = 0.0; +// else +// yb = 0.0; +// +// nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); +// if(fabs(1.0-nabla) < acc) +// return 0.0; +// return signui*(1.0 - nabla); +// } +// else if (signui < 0.0) +// { +// +// /**/ /* if(boundaryCondition & 2) +// xf = (u[mesh.getCellXSuccessor( cellIndex )] - u[cellIndex])*ihx; +// else*//* if(boundaryCondition & 4) +// xf = 0.0; +// else /**/if(coordinates.x() == mesh.getDimensions().x() - 1) +// xf = positivePart((u[neighbourEntities.template getEntityIndex< -1, 0 >()] - u[cellIndex])*ihx); +// else +// xf = positivePart((u[neighbourEntities.template getEntityIndex< 1, 0 >()] - u[cellIndex])*ihx); +// +// /**/ /* if(boundaryCondition & 4) +// xb = (u[cellIndex] - u[mesh.getCellXPredecessor( cellIndex )])*ihx; +// else*//* if(boundaryCondition & 2) +// xb = 0.0; +// else /**/if(coordinates.x() == 0) +// xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 1, 0 >()])*ihx); +// else +// xb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< -1, 0 >()])*ihx); +// +// /**/ /* if(boundaryCondition & 1) +// yf = (u[mesh.getCellYSuccessor( cellIndex )] - u[cellIndex])*ihy; +// else *//*if(boundaryCondition & 8) +// yf = 0.0; +// else /**/if(coordinates.y() == mesh.getDimensions().y() - 1) +// yf = positivePart((u[neighbourEntities.template getEntityIndex< 0, -1 >()] - u[cellIndex])*ihy); +// else +// yf = positivePart((u[neighbourEntities.template getEntityIndex< 0, 1 >()] - u[cellIndex])*ihy); +// +// /**/ /* if(boundaryCondition & 8) +// yb = (u[cellIndex] - u[mesh.getCellYPredecessor( cellIndex )])*ihy; +// else*//* if(boundaryCondition & 1) +// yb = 0.0; +// else /**/if(coordinates.y() == 0) +// yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0, 1 >()])*ihy); +// else +// yb = negativePart((u[cellIndex] - u[neighbourEntities.template getEntityIndex< 0, -1 >()])*ihy); +// +// +// if(xb - xf > 0.0) +// xf = 0.0; +// else +// xb = 0.0; +// +// if(yb - yf > 0.0) +// yf = 0.0; +// else +// yb = 0.0; +// +// nabla = sqrt (xf*xf + xb*xb + yf*yf + yb*yb ); +// +// if(fabs(1.0-nabla) < acc) +// return 0.0; +// return signui*(1.0 - nabla); +// } +// else +// { +// return 0.0; +// } + +} + + + + + + + + + +template< typename MeshReal, + typename Device, + typename MeshIndex, + typename Real, + typename Index > + +#ifdef HAVE_CUDA +__device__ +#endif +Real parallelGodunovMapScheme< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: getValueDev( const MeshType& mesh, + const IndexType cellIndex, + const CoordinatesType& coordinates, + const Real* u, + const Real& time, + const IndexType boundaryCondition, + const tnlNeighbourGridEntityGetter<tnlGridEntity< MeshType, 2, tnlGridEntityNoStencilStorage >,2> neighbourEntities, + const Real* map) const +{ +// int gid = (blockDim.y*blockIdx.y + threadIdx.y)*blockDim.x*gridDim.x + blockDim.x*blockIdx.x + threadIdx.x; + + RealType signui; +// if(boundaryCondition == 0) + signui = sign(u[cellIndex],/*(boundaryCondition != 0) * */this->epsilon); +// else +// signui = Sign(u[cellIndex]); + + RealType value; +// if(map[cellIndex] == 0.0) +// { +//// value = INT_MAX; +// u[cellIndex] = Sign(u[cellIndex])*INT_MAX; +// return 0.0; +// } +// else + value = 50.0/ map[cellIndex]; + + + RealType xb = u[cellIndex]; + RealType xf = -u[cellIndex]; + RealType yb = u[cellIndex]; + RealType yf = -u[cellIndex]; + RealType a,b,c; + + + if(coordinates.x() == mesh.getDimensions().x() - 1) + xf += u[neighbourEntities.template getEntityIndex< -1, 0 >()]; + else + xf += u[neighbourEntities.template getEntityIndex< 1, 0 >()]; + + if(coordinates.x() == 0) + xb -= u[neighbourEntities.template getEntityIndex< 1, 0 >()]; + else + xb -= u[neighbourEntities.template getEntityIndex< -1, 0 >()]; + + if(coordinates.y() == mesh.getDimensions().y() - 1) + yf += u[neighbourEntities.template getEntityIndex< 0, -1 >()]; + else + yf += u[neighbourEntities.template getEntityIndex< 0, 1 >()]; + + if(coordinates.y() == 0) + yb -= u[neighbourEntities.template getEntityIndex< 0, 1 >()]; + else + yb -= u[neighbourEntities.template getEntityIndex< 0, -1 >()]; + + + if(signui > 0.0) + { + xf = negativePart(xf); + + xb = positivePart(xb); + + yf = negativePart(yf); + + yb = positivePart(yb); + + } + else if(signui < 0.0) + { + + xb = negativePart(xb); + + xf = positivePart(xf); + + yb = negativePart(yb); + + yf = positivePart(yf); + } + + /*if(boundaryCondition &1) + yb = 0.0; + else + yf = 0.0; + + if(boundaryCondition & 2) + xb = 0.0; + else + xf = 0.0;*/ + + if(xb - xf > 0.0) + a = xb; + else + a = xf; + + if(yb - yf > 0.0) + b = yb; + else + b = yf; + + c =(value - sqrt(a*a+b*b)*ihx ); + + if(c > 0.0 ) + return Sign(u[cellIndex])*c; + else + + return signui*c; +} + + +#endif /* PARALLELGODUNOVMAP2D_IMPL_H_ */