diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e855035df443b4ee53dc64014fe5d45051777b5..41a8ed529a8444613c42caa843da5e3998a9e5ca 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ # Vladimir Klement # Jakub Klinkovsky -cmake_minimum_required( VERSION 3.0 ) +cmake_minimum_required( VERSION 3.4 ) project( tnl ) @@ -33,7 +33,6 @@ if( CMAKE_BUILD_TYPE STREQUAL "Debug") set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/lib ) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/bin ) set( debugExt -dbg ) - set( CMAKE_CXX_FLAGS "${CXXFLAGS} -g ") else() set( PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/Release/src ) set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/tests ) @@ -41,10 +40,12 @@ else() set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/lib) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin) endif() + # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) -set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -g" ) +set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG" ) +#set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) # pass -rdynamic only in Debug mode set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" ) set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" ) @@ -65,8 +66,8 @@ if( WITH_CUDA STREQUAL "yes" ) set( BUILD_CUDA TRUE) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) set(BUILD_SHARED_LIBS ON) - set(CUDA_SEPARABLE_COMPILATION ON) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA ) + set(CUDA_SEPARABLE_COMPILATION ON) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ;-DHAVE_CUDA ) #AddCompilerFlag( "-DHAVE_NOT_CXX11" ) # -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 " ) set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 @@ -110,6 +111,9 @@ if( WITH_CUDA STREQUAL "yes" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} ) # TODO: this is necessary only due to a bug in cmake set( CUDA_ADD_LIBRARY_OPTIONS -shared ) + # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1) + set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -ccbin "${CMAKE_CXX_COMPILER}" ) + set( CUDA_HOST_COMPILER "" ) #### # Check for cuBLAS @@ -152,7 +156,7 @@ if( WITH_CUDA STREQUAL "yes" ) cuda_include_directories( ${CUSPARSE_INCLUDE_DIR} ) set( CUSPARSE_LIBRARY "${CUDA_cusparse_LIBRARY}" ) endif() - endif( NOT WITH_CUSPARSE STREQUAL "no" ) + endif( NOT WITH_CUSPARSE STREQUAL "no" ) endif( CUDA_FOUND ) endif( WITH_CUDA STREQUAL "yes" ) diff --git a/TODO b/TODO index 7619d268836f5bf97700a437fa452c3f83055ff9..3f427388a1ec4b4ccb2818f3532ca98bb16b0708 100644 --- a/TODO +++ b/TODO @@ -10,6 +10,11 @@ TODO: - data by se na hostu preskupila do souvisleho bloku dat a ten se prenesl najednou +TODO: + - zrejme bude potreba udrzovat ke kazdemu objektu jeho obraz na GPU/MIC + - to by zarizovala metoda syncToDevice() napr. kazdy objekt by mel promennou modified, ktera by rikala, jestli se zmenil a zda je nutne ho + prekopirovavat + TODO: - zavest namespaces @@ -21,11 +26,6 @@ TODO: CUDA unified memory se s nimi pracovat postaru - bylo by dobre to obalit unique poinetry, aby se nemusela delat dealokace rucne -TODO: shared pointery - - mohli bysme pomoci nich odstranit Shared objekty - - asi by bylo lepsi datcounter z shared pointeru primo do array a tento counter by se alokoval az po porvnim sdileni dat - - diky tomu by se array mohlo vytvaret i na gpu bez nutnosti dynamicke alokace, jen by nebylo mozne delat bind (nebo nejaky zjednoduseny) - TODO: Mesh * vsechny traits zkusit presunout do jednotneho MeshTraits, tj. temer MeshConfigTraits ale pojmenovat jako MeshTraits * omezit tnlDimesnionsTag - asi to ale nepujde diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f98373d2327cf5fa20e561c789d988a314c2de59..ba9da977671d770fc9d6fa0eaf265b01400a0549 100755 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -22,13 +22,13 @@ set (headers tnlAssert.h tnlList.h tnlList_impl.h tnlLogger.h - tnlOmp.h tnlObject.h tnlStack.h tnlStaticFor.h tnlStatistics.h tnlString.h tnlReal.h + tnlTimer.h tnlTimerCPU.h tnlTimerRT.h mfilename.h @@ -48,12 +48,12 @@ set( common_SOURCES ${CURRENT_DIR}/tnlObject.cpp ${CURRENT_DIR}/tnlStatistics.cpp ${CURRENT_DIR}/tnlString.cpp + ${CURRENT_DIR}/tnlTimer.cpp ${CURRENT_DIR}/tnlTimerCPU.cpp ${CURRENT_DIR}/mfilename.cpp ${CURRENT_DIR}/mpi-supp.cpp ${CURRENT_DIR}/tnlCuda.cpp - ${CURRENT_DIR}/tnlHost.cpp - ${CURRENT_DIR}/tnlOmp.cpp ) + ${CURRENT_DIR}/tnlHost.cpp ) IF( BUILD_CUDA ) set( tnl_core_CUDA__SOURCES diff --git a/src/core/terminal-colors.h b/src/core/terminal-colors.h index 2c67d00c51a06e57f0300f8a0b9d7a73cb61eda9..9a1d123f21a612a19c4ce00765e6bd13db6712a0 100644 --- a/src/core/terminal-colors.h +++ b/src/core/terminal-colors.h @@ -18,12 +18,13 @@ #ifndef TERMINAL_COLORS_H #define TERMINAL_COLORS_H -const std::string red( "\033[0;31m" ); -const std::string green( "\033[1;32m" ); -const std::string yellow( "\033[1;33m" ); -const std::string cyan( "\033[0;36m" ); -const std::string magenta( "\033[0;35m" ); -const std::string reset( "\033[0m" ); +const tnlString red( "\033[0;31m" ); +const tnlString green( "\033[1;32m" ); +const tnlString yellow( "\033[1;33m" ); +const tnlString cyan( "\033[0;36m" ); +const tnlString magenta( "\033[0;35m" ); +const tnlString bold(); +const tnlString reset( "\033[0m" ); #endif /* TERMINAL_COLORS_H */ diff --git a/src/core/tnlCuda.cu b/src/core/tnlCuda.cu index 4f38ceb9dcaccfe236e890778f5ec9b4ea09a24d..113cba35227a0c461f1a32dc1148cd1e9646ac0f 100644 --- a/src/core/tnlCuda.cu +++ b/src/core/tnlCuda.cu @@ -16,27 +16,30 @@ ***************************************************************************/ #include <core/tnlCuda.h> +#include <config/tnlConfigDescription.h> +#include <config/tnlParameterContainer.h> -void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) + +/*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) { #ifdef HAVE_CUDA - //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); - //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 ); #else - //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); - //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); -#endif - + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 ); +#endif } bool tnlCuda::setup( const tnlParameterContainer& parameters, const tnlString& prefix ) { - //enable = parameters.getParameter< bool >( prefix + "omp-enabled" ); - //maxThreadsCount = parameters.getParameter< int ( prefix + "omp-max-threads" ); + int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); +#ifdef HAVE_CUDA + cudaSetDevice( cudaDevice ); + checkCudaDevice; +#endif return true; } - +*/ bool tnlCuda::checkDevice( const char* file_name, int line ) { diff --git a/src/core/tnlHost.cpp b/src/core/tnlHost.cpp index 7d53f0435cdc88f707af57ad46b0ee45b6b3d8af..639ea76eecbe38758270d950ac8dfdcba5eb9503 100644 --- a/src/core/tnlHost.cpp +++ b/src/core/tnlHost.cpp @@ -19,6 +19,15 @@ #define TNLHOSTL_H_ #include <core/tnlHost.h> +#ifdef HAVE_OPENMP +#include <omp.h> +#endif +#include <config/tnlConfigDescription.h> +#include <config/tnlParameterContainer.h> + + +bool tnlHost::ompEnabled( true ); +int tnlHost::maxThreadsCount( -1 ); tnlString tnlHost::getDeviceType() { @@ -32,4 +41,63 @@ size_t tnlHost::getFreeMemory() return pages * page_size; }; +void tnlHost::enableOMP() +{ + ompEnabled = true; +} + +void tnlHost::disableOMP() +{ + ompEnabled = false; +} + +void tnlHost::setMaxThreadsCount( int maxThreadsCount_ ) +{ + maxThreadsCount = maxThreadsCount_; +#ifdef HAVE_OPENMP + omp_set_num_threads( maxThreadsCount ); +#endif +} + +int tnlHost::getMaxThreadsCount() +{ +#ifdef HAVE_OPENMP + if( maxThreadsCount == -1 ) + return omp_get_max_threads(); + return maxThreadsCount; +#else + return 0; +#endif +} + +int tnlHost::getThreadIdx() +{ +#ifdef HAVE_OPENMP + return omp_get_thread_num(); +#else + return 0; +#endif +} + +void tnlHost::configSetup( tnlConfigDescription& config, const tnlString& prefix ) +{ +#ifdef HAVE_OPENMP + config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); + config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); +#else + config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); + config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); +#endif + +} + +bool tnlHost::setup( const tnlParameterContainer& parameters, + const tnlString& prefix ) +{ + ompEnabled = parameters.getParameter< bool >( prefix + "omp-enabled" ); + maxThreadsCount = parameters.getParameter< int >( prefix + "omp-max-threads" ); + return true; +} + + #endif /* TNLHOST_H_ */ diff --git a/src/core/tnlHost.h b/src/core/tnlHost.h index 6f921492dfb24f0b5c2f8c5106eacf2ff56252c2..23826c968e7d81b4d4bd0f0b0283ec4546dda886 100644 --- a/src/core/tnlHost.h +++ b/src/core/tnlHost.h @@ -22,20 +22,48 @@ #include <core/tnlDevice.h> #include <core/tnlString.h> +class tnlConfigDescription; +class tnlParameterContainer; + class tnlHost { public: - enum { DeviceType = tnlHostDevice }; + enum { DeviceType = tnlHostDevice }; + + static tnlString getDeviceType(); + + #ifdef HAVE_CUDA + __host__ __device__ + #endif + static inline tnlDeviceEnum getDevice() { return tnlHostDevice; }; + + static size_t getFreeMemory(); + + static void disableOMP(); + + static void enableOMP(); + + static inline bool isOMPEnabled() { return ompEnabled; }; + + static void setMaxThreadsCount( int maxThreadsCount ); + + static int getMaxThreadsCount(); + + static int getThreadIdx(); + + static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" ); + + static bool setup( const tnlParameterContainer& parameters, + const tnlString& prefix = "" ); - static tnlString getDeviceType(); + protected: + + static bool ompEnabled; + + static int maxThreadsCount; -#ifdef HAVE_CUDA - __host__ __device__ -#endif - static inline tnlDeviceEnum getDevice() { return tnlHostDevice; }; - static size_t getFreeMemory(); }; #endif /* TNLHOST_H_ */ diff --git a/src/core/tnlOmp.cpp b/src/core/tnlOmp.cpp deleted file mode 100644 index 841d9fc57b1681783fe53c097d53fd50d787200c..0000000000000000000000000000000000000000 --- a/src/core/tnlOmp.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/*************************************************************************** - tnlOmp.cpp - description - ------------------- - begin : Mar 4, 2016 - copyright : (C) 2016 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include <core/tnlOmp.h> -#ifdef HAVE_OPENMP -#include <omp.h> -#endif - -bool tnlOmp::enabled( true ); -int tnlOmp::maxThreadsCount( -1 ); - -void tnlOmp::enable() -{ - enabled = true; -} - -void tnlOmp::disable() -{ - enabled = false; -} - -void tnlOmp::setMaxThreadsCount( int maxThreadsCount_ ) -{ - maxThreadsCount = maxThreadsCount_; -} - -int tnlOmp::getMaxThreadsCount() -{ -#ifdef HAVE_OPENMP - if( maxThreadsCount == -1 ) - return omp_get_max_threads(); - return maxThreadsCount; -#else - return 0; -#endif -} - -int tnlOmp::getThreadIdx() -{ -#ifdef HAVE_OPENMP - return omp_get_thread_num(); -#else - return 0; -#endif -} - -void tnlOmp::configSetup( tnlConfigDescription& config, const tnlString& prefix ) -{ -#ifdef HAVE_OPENMP - config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); - config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); -#else - config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); - config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); -#endif - -} - -bool tnlOmp::setup( const tnlParameterContainer& parameters, - const tnlString& prefix ) -{ - enabled = parameters.getParameter< bool >( prefix + "omp-enabled" ); - maxThreadsCount = parameters.getParameter< int >( prefix + "omp-max-threads" ); - return true; -} - diff --git a/src/core/tnlTimer.cpp b/src/core/tnlTimer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..30e43f41dfb9faa83ebee6dc6b06714c589ef5f8 --- /dev/null +++ b/src/core/tnlTimer.cpp @@ -0,0 +1,152 @@ +/*************************************************************************** + tnlTimer.cpp - description + ------------------- + begin : Mar 14, 2016 + copyright : (C) 2016 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <core/tnlTimer.h> + +#include "tnlConfig.h" +#ifdef HAVE_SYS_RESOURCE_H + #include <sys/resource.h> +#endif +#ifdef HAVE_SYS_TIME_H + #include <stddef.h> + #include <sys/time.h> + #define HAVE_TIME +#endif + + +tnlTimer defaultTimer; + +tnlTimer::tnlTimer() +{ + reset(); +} + +void tnlTimer::reset() +{ + this->initialCPUTime = 0; + this->totalCPUTime = 0.0; + this->initialRealTime = 0; + this->totalRealTime = 0.0; + this->initialCPUCycles = 0; + this->totalCPUCycles = 0; + this->stopState = true; +} + +void tnlTimer::stop() +{ + + if( ! this->stopState ) + { + /**** + * Real time + */ +#ifdef HAVE_TIME + struct timeval tp; + int rtn = gettimeofday( &tp, NULL ); + this->totalRealTime += ( double ) tp. tv_sec + 1.0e-6 * ( double ) tp. tv_usec - this->initialRealTime; +#endif + + /**** + * CPU time + */ +#ifdef HAVE_SYS_RESOURCE_H + rusage initUsage; + getrusage( RUSAGE_SELF, &initUsage ); + this->totalCPUTime += initUsage. ru_utime. tv_sec + 1.0e-6 * ( double ) initUsage. ru_utime. tv_usec - this->initialCPUTime; +#endif + + /**** + * CPU cycles + */ + this->totalCPUCycles += this->rdtsc() - this->initialCPUCycles; + this->stopState = true; + } +} + +void tnlTimer::start() +{ + /**** + * Real time + */ +#ifdef HAVE_TIME + struct timeval tp; + int rtn = gettimeofday( &tp, NULL ); + this->initialRealTime = ( double ) tp. tv_sec + 1.0e-6 * ( double ) tp. tv_usec; +#endif + + /**** + * CPU Time + */ +#ifdef HAVE_SYS_RESOURCE_H + rusage initUsage; + getrusage( RUSAGE_SELF, &initUsage ); + this->initialCPUTime = initUsage. ru_utime. tv_sec + 1.0e-6 * ( double ) initUsage. ru_utime. tv_usec; +#endif + + /**** + * CPU cycles + */ + this->initialCPUCycles = this->rdtsc(); + + this->stopState = false; +} + +double tnlTimer::getRealTime() +{ +#ifdef HAVE_TIME + if( ! this->stopState ) + { + this->stop(); + this->start(); + } + return this->totalRealTime; +#else + return -1; +#endif +} + +double tnlTimer::getCPUTime() +{ +#ifdef HAVE_SYS_RESOURCE_H + if( ! this->stopState ) + { + this->stop(); + this->start(); + } + return this->totalCPUTime; +#else + return -1; +#endif +} + +unsigned long long int tnlTimer::getCPUCycles() +{ + if( ! this->stopState ) + { + this->stop(); + this->start(); + } + return this->totalCPUCycles; +} + +bool tnlTimer::writeLog( tnlLogger& logger, int logLevel ) +{ + logger.writeParameter< double >( "Real time:", this->getRealTime(), logLevel ); + logger.writeParameter< double >( "CPU time:", this->getCPUTime(), logLevel ); + logger.writeParameter< unsigned long long int >( "CPU Cycles:", this->getCPUCycles(), logLevel ); + +} diff --git a/src/core/tnlOmp.h b/src/core/tnlTimer.h similarity index 52% rename from src/core/tnlOmp.h rename to src/core/tnlTimer.h index 2308964a13d3f7eef8b3c6ae797f5e9d3b4aaddc..b72001da772626e35f7371ab9f97ba3db1f7a296 100644 --- a/src/core/tnlOmp.h +++ b/src/core/tnlTimer.h @@ -1,7 +1,7 @@ /*************************************************************************** - tnlOmp.h - description + tnlTimer.h - description ------------------- - begin : Mar 4, 2016 + begin : Mar 14, 2016 copyright : (C) 2016 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -16,40 +16,49 @@ ***************************************************************************/ -#ifndef TNLOMP_H -#define TNLOMP_H +#ifndef TNLTIMER_H +#define TNLTIMER_H -#include <config/tnlConfigDescription.h> -#include <config/tnlParameterContainer.h> +#include <core/tnlLogger.h> -class tnlOmp +class tnlTimer { - public: - - static void disable(); - - static void enable(); - - static inline bool isEnabled() { return enabled; }; - - static void setMaxThreadsCount( int maxThreadsCount ); - - static int getMaxThreadsCount(); - - static int getThreadIdx(); - - static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" ); + public: + + tnlTimer(); + + void reset(); + + void stop(); + + void start(); + + double getRealTime(); + + double getCPUTime(); + + unsigned long long int getCPUCycles(); - static bool setup( const tnlParameterContainer& parameters, - const tnlString& prefix = "" ); - + bool writeLog( tnlLogger& logger, int logLevel = 0 ); + protected: - - static bool enabled; - - static int maxThreadsCount; + + double initialRealTime, totalRealTime, + initialCPUTime, totalCPUTime; + + unsigned long long int initialCPUCycles, totalCPUCycles; + + bool stopState; + + inline unsigned long long rdtsc() + { + unsigned hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ( ( unsigned long long ) lo ) | ( ( ( unsigned long long ) hi ) << 32 ); + } }; +extern tnlTimer defaultTimer; -#endif /* TNLOMP_H */ +#endif /* TNLTIMER_H */ diff --git a/src/core/vectors/tnlVectorOperationsHost_impl.h b/src/core/vectors/tnlVectorOperationsHost_impl.h index cd7a4b48101f0d9c32dae37a61d5eea25ac49a7a..1ea7fd98a1b17d5a237078bc9079d8da33d2cdd8 100644 --- a/src/core/vectors/tnlVectorOperationsHost_impl.h +++ b/src/core/vectors/tnlVectorOperationsHost_impl.h @@ -18,8 +18,6 @@ #ifndef TNLVECTOROPERATIONSHOST_IMPL_H_ #define TNLVECTOROPERATIONSHOST_IMPL_H_ -#include <core/tnlOmp.h> - static const int OpenMPVectorOperationsThreshold = 65536; // TODO: check this threshold template< typename Vector > @@ -104,7 +102,7 @@ getVectorL1Norm( const Vector& v ) Real result( 0.0 ); const Index n = v. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += fabs( v[ i ] ); @@ -122,7 +120,7 @@ getVectorL2Norm( const Vector& v ) Real result( 0.0 ); const Index n = v. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) { @@ -151,7 +149,7 @@ getVectorLpNorm( const Vector& v, Real result( 0.0 ); const Index n = v. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += pow( fabs( v[ i ] ), p ); @@ -168,7 +166,7 @@ typename Vector :: RealType tnlVectorOperations< tnlHost > :: getVectorSum( cons Real result( 0.0 ); const Index n = v. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += v[ i ]; @@ -256,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, Real result( 0.0 ); const Index n = v1. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += fabs( v1[ i ] - v2[ i ] ); @@ -278,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, Real result( 0.0 ); const Index n = v1. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) { @@ -312,7 +310,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, Real result( 0.0 ); const Index n = v1. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += pow( fabs( v1. getElement( i ) - v2. getElement( i ) ), p ); @@ -332,7 +330,7 @@ typename Vector1::RealType tnlVectorOperations< tnlHost > :: getVectorDifference Real result( 0.0 ); const Index n = v1. getSize(); #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) result += v1. getElement( i ) - v2. getElement( i ); @@ -371,7 +369,7 @@ typename Vector1 :: RealType tnlVectorOperations< tnlHost > :: getScalarProduct( Real result( 0.0 ); const Index n = v1. getSize(); #ifdef HAVE_OPENMP - #pragma omp parallel for reduction(+:result) if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold + #pragma omp parallel for reduction(+:result) if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i++ ) result += v1[ i ] * v2[ i ]; @@ -411,13 +409,13 @@ void tnlVectorOperations< tnlHost > :: addVector( Vector1& y, const Index n = y. getSize(); if( thisMultiplicator == 1.0 ) #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) y[ i ] += alpha * x[ i ]; else #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; @@ -445,13 +443,13 @@ addVectors( Vector1& v, const Index n = v.getSize(); if( thisMultiplicator == 1.0 ) #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) v[ i ] += multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; else #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold +#pragma omp parallel for if( tnlHost::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold #endif for( Index i = 0; i < n; i ++ ) v[ i ] = thisMultiplicator * v[ i ] * multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; diff --git a/src/functions/tnlMeshFunction.h b/src/functions/tnlMeshFunction.h index add52cc1c939d2b202206e15a949f1879285d050..e86aca9d3260e1cb33e9827b7b3cafd87a232c73 100644 --- a/src/functions/tnlMeshFunction.h +++ b/src/functions/tnlMeshFunction.h @@ -66,6 +66,8 @@ class tnlMeshFunction : bool setup( const tnlParameterContainer& parameters, const tnlString& prefix = "" ); + void bind( ThisType& meshFunction ); + template< typename Vector > void bind( const MeshType& mesh, const Vector& data, diff --git a/src/functions/tnlMeshFunction_impl.h b/src/functions/tnlMeshFunction_impl.h index 2b08d2f2927c25def185d48b1084a40299379331..5fe48a66259e5a5c6e702a918482fe95c0af1248 100644 --- a/src/functions/tnlMeshFunction_impl.h +++ b/src/functions/tnlMeshFunction_impl.h @@ -133,6 +133,17 @@ setup( const tnlParameterContainer& parameters, return true; } +template< typename Mesh, + int MeshEntityDimensions, + typename Real > +void +tnlMeshFunction< Mesh, MeshEntityDimensions, Real >:: +bind( tnlMeshFunction< Mesh, MeshEntityDimensions, Real >& meshFunction ) +{ + this->mesh = meshFunction.getMesh(); + this->data.bind( meshFunction.getData() ); +} + template< typename Mesh, int MeshEntityDimensions, typename Real > diff --git a/src/functions/tnlTestFunction_impl.h b/src/functions/tnlTestFunction_impl.h index 46f70b3e4fa6bf5b4717702c176b201dc7c49e8b..32271a80d9f85abbb016ccdea29db394e447f293 100644 --- a/src/functions/tnlTestFunction_impl.h +++ b/src/functions/tnlTestFunction_impl.h @@ -291,31 +291,31 @@ getPartialDerivative( const VertexType& vertex, { case constant: return scale * ( ( tnlConstantFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case expBump: return scale * ( ( tnlExpBumpFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case sinBumps: return scale * ( ( tnlSinBumpsFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case sinWave: return scale * ( ( tnlSinWaveFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case cylinder: return scale * ( ( tnlCylinderFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case flowerpot: return scale * ( ( tnlFlowerpotFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case twins: return scale * ( ( tnlTwinsFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case pseudoSquare: return scale * ( ( tnlPseudoSquareFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); case blob: return scale * ( ( tnlBlobFunction< Dimensions, Real >* ) function )-> - getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); + template getPartialDerivative< XDiffOrder, YDiffOrder, ZDiffOrder >( vertex, time ); default: return 0.0; } @@ -476,17 +476,17 @@ tnlTestFunction< FunctionDimensions, Real, Device >:: printFunction( ostream& str ) const { FunctionType* f = ( FunctionType* ) this->function; - switch( Device::DeviceType ) + if( std::is_same< Device, tnlHost >::value ) { - case tnlHostDevice: - str << *f; - return str; - case tnlCudaDevice: - tnlCuda::print( f, str ); - return str; - default: - return str; + str << *f; + return str; } + if( std::is_same< Device, tnlCuda >::value ) + { + tnlCuda::print( f, str ); + return str; + } + return str; } template< int FunctionDimensions, diff --git a/src/matrices/tnlCSRMatrix_impl.h b/src/matrices/tnlCSRMatrix_impl.h index e441b04685d50859a1e0c750ba378a25d65e4949..6622a0e0d55e8a04e38a971fa5834d06221e2f1d 100644 --- a/src/matrices/tnlCSRMatrix_impl.h +++ b/src/matrices/tnlCSRMatrix_impl.h @@ -726,7 +726,7 @@ class tnlCSRMatrixDeviceDependentCode< tnlHost > const InVector* inVectorPtr = &inVector; OutVector* outVectorPtr = &outVector; #ifdef HAVE_OPENMP -#pragma omp parallel for private( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( tnlOmp::isEnabled() ) +#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < rows; row ++ ) ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr ); diff --git a/src/matrices/tnlDenseMatrix_impl.h b/src/matrices/tnlDenseMatrix_impl.h index d20c9512e0384921a1cbf3be1a8538e9115f1546..4e78c369d01cda60d87cf2dbb3cd2d21789ba7d4 100644 --- a/src/matrices/tnlDenseMatrix_impl.h +++ b/src/matrices/tnlDenseMatrix_impl.h @@ -932,7 +932,7 @@ class tnlDenseMatrixDeviceDependentCode< tnlHost > OutVector& outVector ) { #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() ) +#pragma omp parallel for if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); diff --git a/src/matrices/tnlEllpackMatrix_impl.h b/src/matrices/tnlEllpackMatrix_impl.h index b56bdfaf38c627fd81672eebf7dfbdbbe7ff8af7..6061ac0f22ead22825ae46ea4bc72524779860bd 100644 --- a/src/matrices/tnlEllpackMatrix_impl.h +++ b/src/matrices/tnlEllpackMatrix_impl.h @@ -688,7 +688,7 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost > OutVector& outVector ) { #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() ) +#pragma omp parallel for if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); diff --git a/src/matrices/tnlMultidiagonalMatrix_impl.h b/src/matrices/tnlMultidiagonalMatrix_impl.h index ae89ffa4335c444cd86c2ba52d50c4c272f68d8a..2c8a92d0a7b1b3f2db246d7817f0a490b1e6f7cd 100644 --- a/src/matrices/tnlMultidiagonalMatrix_impl.h +++ b/src/matrices/tnlMultidiagonalMatrix_impl.h @@ -735,7 +735,7 @@ class tnlMultidiagonalMatrixDeviceDependentCode< tnlHost > OutVector& outVector ) { #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() ) +#pragma omp parallel for if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); diff --git a/src/matrices/tnlSlicedEllpackMatrix_impl.h b/src/matrices/tnlSlicedEllpackMatrix_impl.h index dd0c6ed78e3902c5ff351965b0c7e4bac18f4049..b047ec08b3891c8ed6d6427cec95f8243424efbc 100644 --- a/src/matrices/tnlSlicedEllpackMatrix_impl.h +++ b/src/matrices/tnlSlicedEllpackMatrix_impl.h @@ -756,7 +756,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > OutVector& outVector ) { #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() ) +#pragma omp parallel for if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); diff --git a/src/matrices/tnlTridiagonalMatrix_impl.h b/src/matrices/tnlTridiagonalMatrix_impl.h index 9857c0b172bce4c635c7ef88269464316fb1a161..3cb917dc81849e2ce12fa4e40a3c33945d2a332a 100644 --- a/src/matrices/tnlTridiagonalMatrix_impl.h +++ b/src/matrices/tnlTridiagonalMatrix_impl.h @@ -654,7 +654,7 @@ class tnlTridiagonalMatrixDeviceDependentCode< tnlHost > OutVector& outVector ) { #ifdef HAVE_OPENMP -#pragma omp parallel for if( tnlOmp::isEnabled() ) +#pragma omp parallel for if( tnlHost::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); diff --git a/src/mesh/grids/tnlGridTraverser.h b/src/mesh/grids/tnlGridTraverser.h index 8b05a88e4eeec5212c3b8a67f33c92f4372a0ca4..8427932dfd340d80ac60187e72522222531baf90 100644 --- a/src/mesh/grids/tnlGridTraverser.h +++ b/src/mesh/grids/tnlGridTraverser.h @@ -112,8 +112,8 @@ class tnlGridTraverser< tnlGrid< 2, Real, tnlHost, Index > > static void processEntities( const GridType& grid, - const CoordinatesType& begin, - const CoordinatesType& end, + const CoordinatesType begin, + const CoordinatesType end, const CoordinatesType& entityOrientation, const CoordinatesType& entityBasis, UserData& userData ); diff --git a/src/mesh/grids/tnlGridTraverser_impl.h b/src/mesh/grids/tnlGridTraverser_impl.h index 93a6fdac22bae92f285b42c94dd559ead740eb25..8fd55207570d47efd120e128a7ef1d1dbea51044 100644 --- a/src/mesh/grids/tnlGridTraverser_impl.h +++ b/src/mesh/grids/tnlGridTraverser_impl.h @@ -38,10 +38,11 @@ processEntities( const CoordinatesType& entityBasis, UserData& userData ) { + + GridEntity entity( grid ); entity.setOrientation( entityOrientation ); entity.setBasis( entityBasis ); - if( processOnlyBoundaryEntities ) { entity.getCoordinates() = begin; @@ -71,8 +72,7 @@ template< typename Real, typename Index, typename GridEntity, typename UserData, - typename EntitiesProcessor, - bool processOnlyBoundaryEntities > + typename EntitiesProcessor > __global__ void tnlGridTraverser1D( const tnlGrid< 1, Real, tnlCuda, Index >* grid, @@ -94,13 +94,49 @@ tnlGridTraverser1D( if( coordinates.x() <= end->x() ) { - if( ! processOnlyBoundaryEntities || entity.isBoundaryEntity() ) + //if( ! processOnlyBoundaryEntities || entity.isBoundaryEntity() ) { entity.refresh(); EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity ); } } } + +template< typename Real, + typename Index, + typename GridEntity, + typename UserData, + typename EntitiesProcessor > +__global__ void +tnlGridBoundaryTraverser1D( + const tnlGrid< 1, Real, tnlCuda, Index >* grid, + UserData* userData, + const typename GridEntity::CoordinatesType* begin, + const typename GridEntity::CoordinatesType* end, + const typename GridEntity::CoordinatesType* entityOrientation, + const typename GridEntity::CoordinatesType* entityBasis ) +{ + typedef Real RealType; + typedef Index IndexType; + typedef tnlGrid< 1, Real, tnlCuda, Index > GridType; + typename GridType::CoordinatesType coordinates; + + if( threadIdx.x == 0 ) + { + coordinates.x() = begin->x(); + GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); + entity.refresh(); + EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity ); + } + if( threadIdx.x == 1 ) + { + coordinates.x() = end->x(); + GridEntity entity( *grid, coordinates, *entityOrientation, *entityBasis ); + entity.refresh(); + EntitiesProcessor::processEntity( entity.getMesh(), *userData, entity ); + } +} + #endif template< typename Real, @@ -128,21 +164,37 @@ processEntities( typename GridEntity::MeshType* kernelGrid = tnlCuda::passToDevice( grid ); UserData* kernelUserData = tnlCuda::passToDevice( userData ); - dim3 cudaBlockSize( 256 ); - dim3 cudaBlocks; - cudaBlocks.x = tnlCuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); - const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); + if( processOnlyBoundaryEntities ) + { + dim3 cudaBlockSize( 2 ); + dim3 cudaBlocks( 1 ); + tnlGridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + kernelBegin, + kernelEnd, + kernelEntityOrientation, + kernelEntityBasis ); + } + else + { + dim3 cudaBlockSize( 256 ); + dim3 cudaBlocks; + cudaBlocks.x = tnlCuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); + const IndexType cudaXGrids = tnlCuda::getNumberOfGrids( cudaBlocks.x ); - for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) - tnlGridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities > - <<< cudaBlocks, cudaBlockSize >>> - ( kernelGrid, - kernelUserData, - kernelBegin, - kernelEnd, - kernelEntityOrientation, - kernelEntityBasis, - gridXIdx ); + for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) + tnlGridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor > + <<< cudaBlocks, cudaBlockSize >>> + ( kernelGrid, + kernelUserData, + kernelBegin, + kernelEnd, + kernelEntityOrientation, + kernelEntityBasis, + gridXIdx ); + } cudaThreadSynchronize(); checkCudaDevice; tnlCuda::freeFromDevice( kernelGrid ); @@ -172,8 +224,8 @@ void tnlGridTraverser< tnlGrid< 2, Real, tnlHost, Index > >:: processEntities( const GridType& grid, - const CoordinatesType& begin, - const CoordinatesType& end, + const CoordinatesType begin, + const CoordinatesType end, const CoordinatesType& entityOrientation, const CoordinatesType& entityBasis, UserData& userData ) @@ -211,6 +263,7 @@ processEntities( } else { +//#pragma omp parallel for firstprivate( entity, begin, end ) if( tnlHost::isOMPEnabled() ) for( entity.getCoordinates().y() = begin.y(); entity.getCoordinates().y() <= end.y(); entity.getCoordinates().y() ++ ) @@ -314,6 +367,7 @@ processEntities( kernelEntityBasis, gridXIdx, gridYIdx ); + cudaThreadSynchronize(); checkCudaDevice; tnlCuda::freeFromDevice( kernelGrid ); diff --git a/src/operators/CMakeLists.txt b/src/operators/CMakeLists.txt index 87ce7309512ca0b3bbb1bad46a2655eb5b5e7dcf..3ab7e12a6678cd719ce4c1e33bf14bdf104ac72f 100755 --- a/src/operators/CMakeLists.txt +++ b/src/operators/CMakeLists.txt @@ -5,7 +5,6 @@ ADD_SUBDIRECTORY( operator-Q ) ADD_SUBDIRECTORY( operator-curvature ) SET( headers tnlDirichletBoundaryConditions.h - tnlDirichletBoundaryConditions_impl.h tnlExactFunctionInverseOperator.h tnlExactIdentityOperator.h tnlExactOperatorComposition.h diff --git a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h index af39c588118eb976eec6425cd2c9d068d530c5e5..9b16952d403e5b707a52c41983b40c4de4085034 100644 --- a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h +++ b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator.h @@ -52,7 +52,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 1,MeshReal, Device, MeshIndex > typename Vector, typename Matrix > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -106,7 +106,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 2, MeshReal, Device, MeshIndex typename Vector, typename Matrix > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -158,7 +158,7 @@ class tnlFiniteVolumeNonlinearOperator< tnlGrid< 3, MeshReal, Device, MeshIndex typename Vector, typename Matrix > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, diff --git a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h index 9bd82a6965d7253712ac51446a31577a95f3df22..2ab2cf18c7b373c460b2e9c566e9978c89bf716c 100644 --- a/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h +++ b/src/operators/diffusion/nonlinear-diffusion-operators/tnlFiniteVolumeNonlinearOperator_impl.h @@ -71,7 +71,7 @@ template< typename MeshEntity, __cuda_callable__ void tnlFiniteVolumeNonlinearOperator< tnlGrid< 1, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -155,7 +155,7 @@ template< typename MeshEntity, __cuda_callable__ void tnlFiniteVolumeNonlinearOperator< tnlGrid< 2, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -268,7 +268,7 @@ __cuda_callable__ #endif void tnlFiniteVolumeNonlinearOperator< tnlGrid< 3, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, diff --git a/src/operators/diffusion/tnlExactMeanCurvature.h b/src/operators/diffusion/tnlExactMeanCurvature.h index bb4b3d67fb798ad1c8d2a5605cea77c532528d12..19506f8382db826d52fa016399c194431854379d 100644 --- a/src/operators/diffusion/tnlExactMeanCurvature.h +++ b/src/operators/diffusion/tnlExactMeanCurvature.h @@ -44,7 +44,7 @@ class tnlExactMeanCurvature template< typename Real > void setRegularizationEpsilon( const Real& eps) { - nonlinearDiffusion.getNonlinearity().getInnerOperator().setRegularizationEpislon( eps ); + nonlinearDiffusion.getNonlinearity().getInnerOperator().setRegularizationEpsilon( eps ); } template< typename Function > diff --git a/src/operators/diffusion/tnlExactNonlinearDiffusion.h b/src/operators/diffusion/tnlExactNonlinearDiffusion.h index fafc9656d465a9293ea18a0e0902b3d20184d101..87403c37446355e7f0392ebcfe5f67a692f8371b 100644 --- a/src/operators/diffusion/tnlExactNonlinearDiffusion.h +++ b/src/operators/diffusion/tnlExactNonlinearDiffusion.h @@ -73,7 +73,7 @@ class tnlExactNonlinearDiffusion< 1, Nonlinearity, InnerOperator > const RealType u_xx = innerOperator.template getPartialDerivative< Function, 2, 0, 0 >( function, v, time ); const RealType g = nonlinearity( function, v, time ); const RealType g_x = nonlinearity.template getPartialDerivative< Function, 1, 0, 0 >( function, v, time ); - return u_xx + u_x * g_x / g; + return u_xx * g + u_x * g_x; } protected: @@ -131,7 +131,7 @@ class tnlExactNonlinearDiffusion< 2, Nonlinearity, InnerOperator > const RealType g_x = nonlinearity.template getPartialDerivative< Function, 1, 0, 0 >( function, v, time ); const RealType g_y = nonlinearity.template getPartialDerivative< Function, 0, 1, 0 >( function, v, time ); - return u_xx + u_yy + ( g_x * u_x + g_y * u_y ) / g; + return ( u_xx + u_yy ) * g + g_x * u_x + g_y * u_y; } protected: @@ -193,7 +193,7 @@ class tnlExactNonlinearDiffusion< 3, Nonlinearity, InnerOperator > const RealType g_y = nonlinearity.template getPartialDerivative< Function, 0, 1, 0 >( function, v, time ); const RealType g_z = nonlinearity.template getPartialDerivative< Function, 0, 0, 1 >( function, v, time ); - return u_xx + u_yy + u_zz + ( g_x * u_x + g_y * u_y + g_z * u_z ) / g; + return ( u_xx + u_yy + u_zz ) * g + g_x * u_x + g_y * u_y + g_z * u_z; } protected: diff --git a/src/operators/diffusion/tnlLinearDiffusion.h b/src/operators/diffusion/tnlLinearDiffusion.h index b4683676e673a985b0a233f5a3f016b9f8bf7f4e..9c63d19b641e79c4a8c282154e74273e1f1c80bd 100644 --- a/src/operators/diffusion/tnlLinearDiffusion.h +++ b/src/operators/diffusion/tnlLinearDiffusion.h @@ -55,12 +55,10 @@ class tnlLinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index static constexpr int getMeshDimensions() { return Dimensions; } - template< int EntityDimensions = Dimensions > - using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >; - static tnlString getType(); - template< typename PreimageFunction, typename MeshEntity > + template< typename PreimageFunction, + typename MeshEntity > __cuda_callable__ inline Real operator()( const PreimageFunction& u, const MeshEntity& entity, @@ -72,19 +70,17 @@ class tnlLinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index const IndexType& index, const MeshEntity& entity ) const; - template< typename MeshEntity, - typename Vector, - typename MatrixRow > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - inline void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const MeshEntity& entity, - const MeshFunction< 1 >& u, - Vector& b, - MatrixRow& matrixRow ) const; - + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const; }; @@ -110,10 +106,6 @@ class tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index static constexpr int getMeshDimensions() { return Dimensions; } - - template< int EntityDimensions = Dimensions > - using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >; - static tnlString getType(); template< typename PreimageFunction, typename EntityType > @@ -127,19 +119,18 @@ class tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index inline Index getLinearSystemRowLength( const MeshType& mesh, const IndexType& index, const EntityType& entity ) const; - - template< typename Vector, - typename MatrixRow, - typename EntityType > + + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - inline void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction< 2 >& u, - Vector& b, - MatrixRow& matrixRow ) const; + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const; }; @@ -165,10 +156,6 @@ class tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index static constexpr int getMeshDimensions() { return Dimensions; } - template< int EntityDimensions = Dimensions > - using MeshFunction = tnlMeshFunction< MeshType, EntityDimensions >; - - static tnlString getType(); template< typename PreimageFunction, @@ -184,19 +171,17 @@ class tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index const IndexType& index, const EntityType& entity ) const; - template< typename Vector, - typename MatrixRow, - typename EntityType > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - inline void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction< 3 >& u, - Vector& b, - MatrixRow& matrixRow ) const; - + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const; }; diff --git a/src/operators/diffusion/tnlLinearDiffusion_impl.h b/src/operators/diffusion/tnlLinearDiffusion_impl.h index ef38d2e8b0f65119b6769fb778b9475df777ec4d..04e4177f6a1b3950912d70926fade6d7d17724a1 100644 --- a/src/operators/diffusion/tnlLinearDiffusion_impl.h +++ b/src/operators/diffusion/tnlLinearDiffusion_impl.h @@ -82,25 +82,27 @@ template< typename MeshReal, typename MeshIndex, typename Real, typename Index > - template< typename MeshEntity, - typename Vector, - typename Matrix > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ inline void tnlLinearDiffusion< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const MeshEntity& entity, - const MeshFunction< 1 >& u, - Vector& b, - Matrix& matrix ) const +setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { + static_assert( MeshEntity::entityDimensions == 1, "Wrong mesh entity dimensions." ); + static_assert( PreimageFunction::getEntitiesDimensions() == 1, "Wrong preimage function" ); const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); - const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2 >(); + const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2 >(); matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(), - lambdaX ); matrixRow.setElement( 1, index, 2.0 * lambdaX ); matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< 1 >(), - lambdaX ); @@ -171,26 +173,28 @@ template< typename MeshReal, typename MeshIndex, typename Real, typename Index > - template< typename Vector, + template< typename PreimageFunction, + typename MeshEntity, typename Matrix, - typename EntityType > + typename Vector > __cuda_callable__ inline void tnlLinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction< 2 >& u, - Vector& b, - Matrix& matrix ) const +setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { + static_assert( MeshEntity::entityDimensions == 2, "Wrong mesh entity dimensions." ); + static_assert( PreimageFunction::getEntitiesDimensions() == 2, "Wrong preimage function" ); + const IndexType& index = entity.getIndex(); typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); - const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2, 0 >(); - const RealType lambdaY = tau * mesh.template getSpaceStepsProducts< 0, -2 >(); - const typename EntityType::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); + const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); + const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); + const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -lambdaY ); matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< -1, 0 >(), -lambdaX ); matrixRow.setElement( 2, index, 2.0 * ( lambdaX + lambdaY ) ); @@ -266,27 +270,29 @@ template< typename MeshReal, typename MeshIndex, typename Real, typename Index > - template< typename Vector, + template< typename PreimageFunction, + typename MeshEntity, typename Matrix, - typename EntityType > + typename Vector > __cuda_callable__ inline void tnlLinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction< 3 >& u, - Vector& b, - Matrix& matrix ) const +setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { - const typename EntityType::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); + static_assert( MeshEntity::entityDimensions == 3, "Wrong mesh entity dimensions." ); + static_assert( PreimageFunction::getEntitiesDimensions() == 3, "Wrong preimage function" ); + const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); - const RealType lambdaX = tau * mesh.template getSpaceStepsProducts< -2, 0, 0 >(); - const RealType lambdaY = tau * mesh.template getSpaceStepsProducts< 0, -2, 0 >(); - const RealType lambdaZ = tau * mesh.template getSpaceStepsProducts< 0, 0, -2 >(); + const RealType lambdaX = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >(); + const RealType lambdaY = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >(); + const RealType lambdaZ = tau * entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >(); matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -lambdaZ ); matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -lambdaY ); matrixRow.setElement( 2, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -lambdaX ); diff --git a/src/operators/diffusion/tnlNonlinearDiffusion_impl.h b/src/operators/diffusion/tnlNonlinearDiffusion_impl.h index 88f0bae7ea1b840b24c3f37e73f5a2ba4874a69d..be5e87d68c57ce72093beb484aacb0ed729b070d 100644 --- a/src/operators/diffusion/tnlNonlinearDiffusion_impl.h +++ b/src/operators/diffusion/tnlNonlinearDiffusion_impl.h @@ -71,7 +71,7 @@ template< typename MeshEntity, __cuda_callable__ void tnlNonlinearDiffusion< tnlGrid< 1, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -80,7 +80,7 @@ updateLinearSystem( const RealType& time, Vector& b, Matrix& matrix ) const { - nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix ); + nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } template< typename MeshReal, @@ -148,7 +148,7 @@ template< typename MeshEntity, __cuda_callable__ void tnlNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -157,7 +157,7 @@ updateLinearSystem( const RealType& time, Vector& b, Matrix& matrix ) const { - nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix ); + nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } template< typename MeshReal, @@ -225,7 +225,7 @@ template< typename MeshEntity, __cuda_callable__ void tnlNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -234,7 +234,7 @@ updateLinearSystem( const RealType& time, Vector& b, Matrix& matrix ) const { - nonlinearDiffusionOperator.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix ); + nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } #endif /* TNLNONLINEARDIFFUSION_IMPL_H */ diff --git a/src/operators/diffusion/tnlOneSidedMeanCurvature.h b/src/operators/diffusion/tnlOneSidedMeanCurvature.h index 87386d89d8fe0145235e6b6c9f09b9226f536bbb..a75f0f855257f3bdb8b8665b1c5c46bb53241579 100644 --- a/src/operators/diffusion/tnlOneSidedMeanCurvature.h +++ b/src/operators/diffusion/tnlOneSidedMeanCurvature.h @@ -106,7 +106,7 @@ class tnlOneSidedMeanCurvature typename Vector, typename Matrix > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -115,7 +115,7 @@ class tnlOneSidedMeanCurvature Vector& b, Matrix& matrix ) const { - this->nonlinearDiffusion.updateLinearSystem( time, tau, mesh, index, entity, u, b, matrix ); + this->nonlinearDiffusion.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } protected: diff --git a/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h b/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h index 1eb186f934d4d7b6f705a53ec2518d11700a9715..e6f62e3edfda873916198ce45f8bcec8609361b2 100644 --- a/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h +++ b/src/operators/diffusion/tnlOneSidedNonlinearDiffusion.h @@ -71,7 +71,7 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, N { const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); const typename MeshEntity::MeshType& mesh = entity.getMesh(); - const RealType& hx_div = mesh.template getSpaceStepsProducts< -2 >(); + const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2 >(); const IndexType& center = entity.getIndex(); const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); @@ -92,26 +92,24 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 1,MeshReal, Device, MeshIndex >, N return 3; } - template< typename MeshEntity, - typename MeshFunction, - typename Vector, - typename Matrix > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const MeshEntity& entity, - const MeshFunction& u, - Vector& b, - Matrix& matrix ) const + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); const typename MeshEntity::template NeighbourEntities< 1 >& neighbourEntities = entity.getNeighbourEntities(); const IndexType& center = entity.getIndex(); const IndexType& east = neighbourEntities.template getEntityIndex< 1 >(); const IndexType& west = neighbourEntities.template getEntityIndex< -1 >(); - const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2 >(); + const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2 >(); const RealType& nonlinearity_center = this->nonlinearity[ center ]; const RealType& nonlinearity_west = this->nonlinearity[ west ]; const RealType aCoef = -lambda_x * nonlinearity_west; @@ -167,8 +165,8 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, { const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); const typename MeshEntity::MeshType& mesh = entity.getMesh(); - const RealType& hx_div = mesh.template getSpaceStepsProducts< -2, 0 >(); - const RealType& hy_div = mesh.template getSpaceStepsProducts< 0, -2 >(); + const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); + const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); const IndexType& center = entity.getIndex(); const IndexType& east = neighbourEntities.template getEntityIndex< 1, 0 >(); const IndexType& west = neighbourEntities.template getEntityIndex< -1, 0 >(); @@ -194,19 +192,17 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, return 5; } - template< typename MeshEntity, - typename MeshFunction, - typename Vector, - typename Matrix > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const MeshEntity& entity, - const MeshFunction& u, - Vector& b, - Matrix& matrix ) const + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); const typename MeshEntity::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); @@ -215,8 +211,8 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 2, MeshReal, Device, MeshIndex >, const IndexType& west = neighbourEntities.template getEntityIndex< -1, 0 >(); const IndexType& north = neighbourEntities.template getEntityIndex< 0, 1 >(); const IndexType& south = neighbourEntities.template getEntityIndex< 0, -1 >(); - const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2, 0 >(); - const RealType lambda_y = tau * mesh.template getSpaceStepsProducts< 0, -2 >(); + const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); + const RealType lambda_y = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); const RealType& nonlinearity_center = this->nonlinearity[ center ]; const RealType& nonlinearity_west = this->nonlinearity[ west ]; const RealType& nonlinearity_south = this->nonlinearity[ south ]; @@ -278,9 +274,9 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, { const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); const typename MeshEntity::MeshType& mesh = entity.getMesh(); - const RealType& hx_div = mesh.template getSpaceStepsProducts< -2, 0, 0 >(); - const RealType& hy_div = mesh.template getSpaceStepsProducts< 0, -2, 0 >(); - const RealType& hz_div = mesh.template getSpaceStepsProducts< 0, 0, -2 >(); + const RealType& hx_div = entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >(); + const RealType& hy_div = entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >(); + const RealType& hz_div = entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >(); const IndexType& center = entity.getIndex(); const IndexType& east = neighbourEntities.template getEntityIndex< 1, 0, 0 >(); const IndexType& west = neighbourEntities.template getEntityIndex< -1, 0, 0 >(); @@ -313,19 +309,17 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, return 7; } - template< typename MeshEntity, - typename MeshFunction, - typename Vector, - typename Matrix > + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > __cuda_callable__ - void updateLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - const IndexType& index, - const MeshEntity& entity, - const MeshFunction& u, - Vector& b, - Matrix& matrix ) const + inline void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const { typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); const typename MeshEntity::template NeighbourEntities< 3 >& neighbourEntities = entity.getNeighbourEntities(); @@ -338,9 +332,9 @@ class tnlOneSidedNonlinearDiffusion< tnlGrid< 3, MeshReal, Device, MeshIndex >, const IndexType& down = neighbourEntities.template getEntityIndex< 0, 0, -1 >(); - const RealType lambda_x = tau * mesh.template getSpaceStepsProducts< -2, 0, 0 >(); - const RealType lambda_y = tau * mesh.template getSpaceStepsProducts< 0, -2, 0 >(); - const RealType lambda_z = tau * mesh.template getSpaceStepsProducts< 0, 0, -2 >(); + const RealType lambda_x = tau * entity.getMesh().template getSpaceStepsProducts< -2, 0, 0 >(); + const RealType lambda_y = tau * entity.getMesh().template getSpaceStepsProducts< 0, -2, 0 >(); + const RealType lambda_z = tau * entity.getMesh().template getSpaceStepsProducts< 0, 0, -2 >(); const RealType& nonlinearity_center = this->nonlinearity[ center ]; const RealType& nonlinearity_west = this->nonlinearity[ west ]; const RealType& nonlinearity_south = this->nonlinearity[ south ]; diff --git a/src/operators/geometric/tnlExactGradientNorm.h b/src/operators/geometric/tnlExactGradientNorm.h index 42af00e87e9d86fa80d3fdbc5fed18237c5a6df3..1b03c3cf12670d7a3826be7f70adc62908939a31 100644 --- a/src/operators/geometric/tnlExactGradientNorm.h +++ b/src/operators/geometric/tnlExactGradientNorm.h @@ -45,7 +45,7 @@ class tnlExactGradientNorm< 1, Real > tnlExactGradientNorm() : epsilonSquare( 0.0 ){}; - void setRegularization( const Real& epsilon ) + void setRegularizationEpsilon( const Real& epsilon ) { this->epsilonSquare = epsilon*epsilon; } @@ -113,7 +113,7 @@ class tnlExactGradientNorm< 2, Real > tnlExactGradientNorm() : epsilonSquare( 0.0 ){}; - void setRegularization( const Real& epsilon ) + void setRegularizationEpsilon( const Real& epsilon ) { this->epsilonSquare = epsilon*epsilon; } @@ -187,7 +187,7 @@ class tnlExactGradientNorm< 3, Real > tnlExactGradientNorm() : epsilonSquare( 0.0 ){}; - void setRegularization( const Real& epsilon ) + void setRegularizationEpsilon( const Real& epsilon ) { this->epsilonSquare = epsilon*epsilon; } diff --git a/src/operators/tnlDirichletBoundaryConditions.h b/src/operators/tnlDirichletBoundaryConditions.h index 01f55a1e8e6861f2db5d0a8f6458acbd70ee284b..b155bb7d964cd2d1c98bccb24470c79d69c6ecff 100644 --- a/src/operators/tnlDirichletBoundaryConditions.h +++ b/src/operators/tnlDirichletBoundaryConditions.h @@ -20,6 +20,7 @@ #include <operators/tnlOperator.h> #include <functions/tnlConstantFunction.h> +#include <functions/tnlFunctionAdapter.h> template< typename Mesh, typename Function = tnlConstantFunction< Mesh::getMeshDimensions(), typename Mesh::RealType >, @@ -36,57 +37,86 @@ class tnlDirichletBoundaryConditions { public: - typedef Mesh MeshType; - typedef Function FunctionType; - typedef Real RealType; - typedef typename MeshType::DeviceType DeviceType; - typedef Index IndexType; - - typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; - typedef typename MeshType::VertexType VertexType; - - static constexpr int getMeshDimensions() { return MeshType::meshDimensions; } + typedef Mesh MeshType; + typedef Function FunctionType; + typedef Real RealType; + typedef typename MeshType::DeviceType DeviceType; + typedef Index IndexType; - static void configSetup( tnlConfigDescription& config, - const tnlString& prefix = "" ); + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef typename MeshType::VertexType VertexType; - bool setup( const tnlParameterContainer& parameters, - const tnlString& prefix = "" ); + static constexpr int getMeshDimensions() { return MeshType::meshDimensions; } - void setFunction( const Function& function ); - - Function& getFunction(); + static void configSetup( tnlConfigDescription& config, + const tnlString& prefix = "" ) + { + Function::configSetup( config, prefix ); + } + + bool setup( const tnlParameterContainer& parameters, + const tnlString& prefix = "" ) + { + return this->function.setup( parameters, prefix ); + } - const Function& getFunction() const; - - template< typename EntityType, - typename MeshFunction > - __cuda_callable__ - const RealType operator()( const MeshFunction& u, - const EntityType& entity, - const RealType& time = 0 ) const; + void setFunction( const Function& function ) + { + this->function = function; + } + + Function& getFunction() + { + return this->function; + } + + const Function& getFunction() const + { + return this->function; + } + + template< typename EntityType, + typename MeshFunction > + __cuda_callable__ + const RealType operator()( const MeshFunction& u, + const EntityType& entity, + const RealType& time = 0 ) const + { + //static_assert( EntityType::getDimensions() == MeshEntitiesDimensions, "Wrong mesh entity dimensions." ); + return tnlFunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time ); + } + + template< typename EntityType > + __cuda_callable__ + IndexType getLinearSystemRowLength( const MeshType& mesh, + const IndexType& index, + const EntityType& entity ) const + { + return 1; + } + + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > + __cuda_callable__ + void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const + { + typename Matrix::MatrixRow matrixRow = matrix.getRow( entity.getIndex() ); + const IndexType& index = entity.getIndex(); + matrixRow.setElement( 0, index, 1.0 ); + b[ index ] = tnlFunctionAdapter< MeshType, Function >::getValue( this->function, entity, time ); + } - template< typename EntityType > - __cuda_callable__ - IndexType getLinearSystemRowLength( const MeshType& mesh, - const IndexType& index, - const EntityType& entity ) const; - - template< typename MatrixRow, - typename EntityType, - typename MeshFunction > - __cuda_callable__ - void updateLinearSystem( const RealType& time, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction& u, - DofVectorType& b, - MatrixRow& matrixRow ) const; protected: - Function function; + Function function; //static_assert( Device::DeviceType == Function::Device::DeviceType ); }; @@ -99,6 +129,4 @@ ostream& operator << ( ostream& str, const tnlDirichletBoundaryConditions< Mesh, return str; } -#include <operators/tnlDirichletBoundaryConditions_impl.h> - #endif /* TNLDIRICHLETBOUNDARYCONDITIONS_H_ */ diff --git a/src/operators/tnlDirichletBoundaryConditions_impl.h b/src/operators/tnlDirichletBoundaryConditions_impl.h deleted file mode 100644 index f78db6237a04fe3ac07afa66497c15237dc9a947..0000000000000000000000000000000000000000 --- a/src/operators/tnlDirichletBoundaryConditions_impl.h +++ /dev/null @@ -1,144 +0,0 @@ -/*************************************************************************** - tnlDirichletBoundaryConditions_impl.h - description - ------------------- - begin : Nov 17, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_ -#define TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_ - -#include <functions/tnlFunctionAdapter.h> - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -void -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -configSetup( tnlConfigDescription& config, - const tnlString& prefix ) -{ - Function::configSetup( config, prefix ); -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -bool -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -setup( const tnlParameterContainer& parameters, - const tnlString& prefix ) -{ - return this->function.setup( parameters, prefix ); -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -void -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -setFunction( const Function& function ) -{ - this->function = function; -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -Function& -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -getFunction() -{ - return this->function; -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -const Function& -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -getFunction() const -{ - return *this->function; -} - - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > -template< typename EntityType, - typename MeshFunction > -__cuda_callable__ -const Real -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -operator()( const MeshFunction& u, - const EntityType& entity, - const RealType& time ) const -{ - //static_assert( EntityType::getDimensions() == MeshEntitiesDimensions, "Wrong mesh entity dimensions." ); - return tnlFunctionAdapter< MeshType, Function >::template getValue( this->function, entity, time ); -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > - template< typename EntityType > -__cuda_callable__ -Index -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -getLinearSystemRowLength( const MeshType& mesh, - const IndexType& index, - const EntityType& entity ) const -{ - return 1; -} - -template< typename Mesh, - typename Function, - int MeshEntitiesDimensions, - typename Real, - typename Index > - template< typename Matrix, - typename EntityType, - typename MeshFunction > -__cuda_callable__ -void -tnlDirichletBoundaryConditions< Mesh, Function, MeshEntitiesDimensions, Real, Index >:: -updateLinearSystem( const RealType& time, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction& u, - DofVectorType& b, - Matrix& matrix ) const -{ - typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); - matrixRow.setElement( 0, index, 1.0 ); - b[ index ] = tnlFunctionAdapter< MeshType, Function >::getValue( this->function, entity, time ); -} - -#endif /* TNLDIRICHLETBOUNDARYCONDITIONS_IMPL_H_ */ diff --git a/src/operators/tnlNeumannBoundaryConditions.h b/src/operators/tnlNeumannBoundaryConditions.h index 7f6e3e15364a1eaf92196e158fd48a8bdd189553..dabef95f9893e768b75ac842b74a63f402d30be2 100644 --- a/src/operators/tnlNeumannBoundaryConditions.h +++ b/src/operators/tnlNeumannBoundaryConditions.h @@ -1,6 +1,8 @@ #ifndef TNLNEUMANNBOUNDARYCONDITIONS_H #define TNLNEUMANNBOUNDARYCONDITIONS_H +#include <functions/tnlFunctionAdapter.h> + template< typename Mesh, typename Function, typename Real = typename Mesh::RealType, @@ -74,26 +76,60 @@ class tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, F __cuda_callable__ const RealType operator()( const MeshFunction& u, const EntityType& entity, - const RealType& time = 0 ) const; + const RealType& time = 0 ) const + { + const MeshType& mesh = entity.getMesh(); + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + if( entity.getCoordinates().x() == 0 ) + return u[ neighbourEntities.template getEntityIndex< 1 >() ] - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + else + return u[ neighbourEntities.template getEntityIndex< -1 >() ] + entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + + } template< typename EntityType > __cuda_callable__ Index getLinearSystemRowLength( const MeshType& mesh, const IndexType& index, - const EntityType& entity ) const; - - template< typename MatrixRow, - typename EntityType, - typename MeshFunction > - __cuda_callable__ - void updateLinearSystem( const RealType& time, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction& u, - DofVectorType& b, - MatrixRow& matrixRow ) const; + const EntityType& entity ) const + { + return 2; + } + + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > + __cuda_callable__ + void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const + { + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); + if( entity.getCoordinates().x() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + else + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + } }; /**** @@ -116,43 +152,104 @@ class tnlNeumannBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, F { public: - typedef tnlGrid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - - typedef Function FunctionType; - typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; - typedef tnlStaticVector< 2, RealType > VertexType; - typedef typename MeshType::CoordinatesType CoordinatesType; - typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType; - typedef tnlNeumannBoundaryConditionsBase< Function > BaseType; - - - template< typename EntityType, - typename MeshFunction > - __cuda_callable__ - const RealType operator()( const MeshFunction& u, - const EntityType& entity, - const RealType& time = 0 ) const; - - template< typename EntityType > - __cuda_callable__ - Index getLinearSystemRowLength( const MeshType& mesh, - const IndexType& index, - const EntityType& entity ) const; + typedef tnlGrid< 2, MeshReal, Device, MeshIndex > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; - template< typename MatrixRow, - typename EntityType, - typename MeshFunction > - __cuda_callable__ - void updateLinearSystem( const RealType& time, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction& u, - DofVectorType& b, - MatrixRow& matrixRow ) const; + typedef Function FunctionType; + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef tnlStaticVector< 2, RealType > VertexType; + typedef typename MeshType::CoordinatesType CoordinatesType; + typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType; + typedef tnlNeumannBoundaryConditionsBase< Function > BaseType; + + + template< typename EntityType, + typename MeshFunction > + __cuda_callable__ + const RealType operator()( const MeshFunction& u, + const EntityType& entity, + const RealType& time = 0 ) const + { + const MeshType& mesh = entity.getMesh(); + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + if( entity.getCoordinates().x() == 0 ) + { + return u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 ) + { + return u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] + entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == 0 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, 1 >() ] - entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] + entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + } + + template< typename EntityType > + __cuda_callable__ + Index getLinearSystemRowLength( const MeshType& mesh, + const IndexType& index, + const EntityType& entity ) const + { + return 2; + } + + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > + __cuda_callable__ + void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const + { + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); + if( entity.getCoordinates().x() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 ) + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 ) + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + } }; /**** @@ -174,43 +271,128 @@ class tnlNeumannBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, F { public: - typedef tnlGrid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - - typedef Function FunctionType; - typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; - typedef tnlStaticVector< 3, RealType > VertexType; - typedef typename MeshType::CoordinatesType CoordinatesType; - typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType; - typedef tnlNeumannBoundaryConditionsBase< Function > BaseType; - - template< typename EntityType, - typename MeshFunction > - __cuda_callable__ - const RealType operator()( const MeshFunction& u, - const EntityType& entity, - const RealType& time = 0 ) const; - - - template< typename EntityType > - __cuda_callable__ - Index getLinearSystemRowLength( const MeshType& mesh, - const IndexType& index, - const EntityType& entity ) const; + typedef tnlGrid< 3, MeshReal, Device, MeshIndex > MeshType; + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; - template< typename MatrixRow, - typename EntityType, - typename MeshFunction > - __cuda_callable__ - void updateLinearSystem( const RealType& time, - const MeshType& mesh, - const IndexType& index, - const EntityType& entity, - const MeshFunction& u, - DofVectorType& b, - MatrixRow& matrixRow ) const; + typedef Function FunctionType; + typedef tnlVector< RealType, DeviceType, IndexType> DofVectorType; + typedef tnlStaticVector< 3, RealType > VertexType; + typedef typename MeshType::CoordinatesType CoordinatesType; + typedef tnlNeumannBoundaryConditions< MeshType, Function, Real, Index > ThisType; + typedef tnlNeumannBoundaryConditionsBase< Function > BaseType; + + template< typename EntityType, + typename MeshFunction > + __cuda_callable__ + const RealType operator()( const MeshFunction& u, + const EntityType& entity, + const RealType& time = 0 ) const + { + const MeshType& mesh = entity.getMesh(); + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + if( entity.getCoordinates().x() == 0 ) + { + return u[ neighbourEntities.template getEntityIndex< 1, 0, 0 >() ] - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 ) + { + return u[ neighbourEntities.template getEntityIndex< -1, 0, 0 >() ] + entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == 0 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, 1, 0 >() ] - entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, -1, 0 >() ] + entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().z() == 0 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, 0, 1 >() ] - entity.getMesh().getSpaceSteps().z() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 ) + { + return u[ neighbourEntities.template getEntityIndex< 0, 0, -1 >() ] + entity.getMesh().getSpaceSteps().z() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + } + + + template< typename EntityType > + __cuda_callable__ + Index getLinearSystemRowLength( const MeshType& mesh, + const IndexType& index, + const EntityType& entity ) const + { + return 2; + } + + template< typename PreimageFunction, + typename MeshEntity, + typename Matrix, + typename Vector > + __cuda_callable__ + void setMatrixElements( const PreimageFunction& u, + const MeshEntity& entity, + const RealType& time, + const RealType& tau, + Matrix& matrix, + Vector& b ) const + { + const auto& neighbourEntities = entity.getNeighbourEntities(); + const IndexType& index = entity.getIndex(); + typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); + if( entity.getCoordinates().x() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 1, 0, 0 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().x() == entity.getMesh().getDimensions().x() - 1 ) + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< -1, 0, 0 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().x() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 1, 0 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().y() == entity.getMesh().getDimensions().y() - 1 ) + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, -1, 0 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().y() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().z() == 0 ) + { + matrixRow.setElement( 0, index, 1.0 ); + matrixRow.setElement( 1, neighbourEntities.template getEntityIndex< 0, 0, 1 >(), -1.0 ); + b[ index ] = - entity.getMesh().getSpaceSteps().z() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + if( entity.getCoordinates().z() == entity.getMesh().getDimensions().z() - 1 ) + { + matrixRow.setElement( 0, neighbourEntities.template getEntityIndex< 0, 0, -1 >(), -1.0 ); + matrixRow.setElement( 1, index, 1.0 ); + b[ index ] = entity.getMesh().getSpaceSteps().z() * + tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); + } + } }; template< typename Mesh, diff --git a/src/operators/tnlNeumannBoundaryConditions_impl.h b/src/operators/tnlNeumannBoundaryConditions_impl.h index adb01b87ee245a2978f2a230a11db09081103261..a9c39d0722183b4d0262e584bf35b8afd2611af4 100644 --- a/src/operators/tnlNeumannBoundaryConditions_impl.h +++ b/src/operators/tnlNeumannBoundaryConditions_impl.h @@ -49,7 +49,7 @@ getFunction() const /**** * 1D grid */ - +/* template< typename MeshReal, typename Device, typename MeshIndex, @@ -106,7 +106,7 @@ template< typename MeshReal, __cuda_callable__ void tnlNeumannBoundaryConditions< tnlGrid< 1, MeshReal, Device, MeshIndex >, Function, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const MeshType& mesh, const IndexType& index, const EntityType& entity, @@ -130,11 +130,12 @@ updateLinearSystem( const RealType& time, b[ index ] = mesh.getSpaceSteps().x() * tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); } -} +}*/ /**** * 2D grid */ +/* template< typename MeshReal, typename Device, typename MeshIndex, @@ -204,7 +205,7 @@ template< typename MeshReal, __cuda_callable__ void tnlNeumannBoundaryConditions< tnlGrid< 2, MeshReal, Device, MeshIndex >, Function, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const MeshType& mesh, const IndexType& index, const EntityType& entity, @@ -242,11 +243,12 @@ updateLinearSystem( const RealType& time, b[ index ] = mesh.getSpaceSteps().y() * tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); } -} +}*/ /**** * 3D grid */ +/* template< typename MeshReal, typename Device, typename MeshIndex, @@ -326,7 +328,7 @@ template< typename MeshReal, __cuda_callable__ void tnlNeumannBoundaryConditions< tnlGrid< 3, MeshReal, Device, MeshIndex >, Function, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const MeshType& mesh, const IndexType& index, const EntityType& entity, @@ -379,6 +381,7 @@ updateLinearSystem( const RealType& time, tnlFunctionAdapter< MeshType, FunctionType >::getValue( this->function, entity, time ); } } +*/ #endif /* TNLNEUMANNBOUNDARYCONDITIONS_IMPL_H */ diff --git a/src/problems/tnlHeatEquationProblem.h b/src/problems/tnlHeatEquationProblem.h index 704f25ca27fc749d388cb2d50495d576e64b0d57..c47cf4072376d4ae668670994f0c4e851ccc7341 100644 --- a/src/problems/tnlHeatEquationProblem.h +++ b/src/problems/tnlHeatEquationProblem.h @@ -29,6 +29,7 @@ #include <operators/diffusion/tnlLinearDiffusion.h> #include <matrices/tnlEllpackMatrix.h> #include <functions/tnlMeshFunction.h> +#include <core/tnlTimer.h> template< typename Mesh, typename BoundaryCondition, @@ -59,6 +60,9 @@ class tnlHeatEquationProblem : public tnlPDEProblem< Mesh, void writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const; + + bool writeEpilog( tnlLogger& logger ); + bool setup( const tnlParameterContainer& parameters ); @@ -108,6 +112,8 @@ class tnlHeatEquationProblem : public tnlPDEProblem< Mesh, BoundaryCondition boundaryCondition; RightHandSide rightHandSide; + + tnlTimer gpuTransferTimer; }; #include <problems/tnlHeatEquationProblem_impl.h> diff --git a/src/problems/tnlHeatEquationProblem_impl.h b/src/problems/tnlHeatEquationProblem_impl.h index ce68ff2b4cf1b40846cff2931af31f4aa38fbdc5..11f1f8569ed90539d3d5c2f8378c37d92470b97c 100644 --- a/src/problems/tnlHeatEquationProblem_impl.h +++ b/src/problems/tnlHeatEquationProblem_impl.h @@ -68,6 +68,19 @@ writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const { } +template< typename Mesh, + typename BoundaryCondition, + typename RightHandSide, + typename DifferentialOperator > +bool +tnlHeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: +writeEpilog( tnlLogger& logger ) +{ + logger.writeParameter< const char* >( "GPU transfer time:", "" ); + this->gpuTransferTimer.writeLog( logger, 1 ); + return true; +} + template< typename Mesh, typename BoundaryCondition, typename RightHandSide, @@ -76,6 +89,7 @@ bool tnlHeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: setup( const tnlParameterContainer& parameters ) { + this->gpuTransferTimer.reset(); if( ! this->boundaryCondition.setup( parameters, "boundary-conditions-" ) || ! this->rightHandSide.setup( parameters, "right-hand-side-" ) ) return false; @@ -207,6 +221,7 @@ getExplicitRHS( const RealType& time, this->bindDofs( mesh, uDofs ); MeshFunctionType fu( mesh, fuDofs ); tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater; + explicitUpdater.setGPUTransferTimer( this->gpuTransferTimer ); explicitUpdater.template update< typename Mesh::Cell >( time, mesh, diff --git a/src/problems/tnlPDEProblem.h b/src/problems/tnlPDEProblem.h index 7be3d7f0a161a2c0a7295961c73c4d94bad6bb16..11f2a7cab0c581bb82f653fc783a5c75420a1eb2 100644 --- a/src/problems/tnlPDEProblem.h +++ b/src/problems/tnlPDEProblem.h @@ -50,6 +50,9 @@ class tnlPDEProblem : public tnlProblem< Real, Device, Index > void writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const; + + bool writeEpilog( tnlLogger& logger ) const; + bool setMeshDependentData( const MeshType& mesh, MeshDependentDataType& meshDependentData ); @@ -62,6 +65,11 @@ class tnlPDEProblem : public tnlProblem< Real, Device, Index > const MeshType& mesh, DofVectorType& dofs, MeshDependentDataType& meshDependentData ); + + void setExplicitBoundaryConditions( const RealType& time, + const MeshType& mesh, + DofVectorType& dofs, + MeshDependentDataType& meshDependentData ); bool postIterate( const RealType& time, const RealType& tau, diff --git a/src/problems/tnlPDEProblem_impl.h b/src/problems/tnlPDEProblem_impl.h index 9954e2b1f4294c348fd4f197f7e3dfb6e08f273d..0955ff0c3db72725098413c15b57c4ec88d29da8 100644 --- a/src/problems/tnlPDEProblem_impl.h +++ b/src/problems/tnlPDEProblem_impl.h @@ -54,6 +54,18 @@ writeProlog( tnlLogger& logger, const tnlParameterContainer& parameters ) const { } +template< typename Mesh, + typename Real, + typename Device, + typename Index > +bool +tnlPDEProblem< Mesh, Real, Device, Index >:: +writeEpilog( tnlLogger& logger ) const +{ + return true; +} + + template< typename Mesh, typename Real, typename Device, @@ -95,6 +107,20 @@ preIterate( const RealType& time, return true; } +template< typename Mesh, + typename Real, + typename Device, + typename Index > +void +tnlPDEProblem< Mesh, Real, Device, Index >:: +setExplicitBoundaryConditions( const RealType& time, + const MeshType& mesh, + DofVectorType& dofs, + MeshDependentDataType& meshDependentData ) +{ +} + + template< typename Mesh, typename Real, typename Device, diff --git a/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h b/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h index ee42d6b199160c5b1474e03ff1a9e48882ead691..09021f4d2beccdb396464c41525623d3ea135576 100644 --- a/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h +++ b/src/solvers/cfd/navier-stokes/tnlNavierStokesSolver_impl.h @@ -323,7 +323,7 @@ void tnlNavierStokesSolver< AdvectionScheme, const IndexType size = dofs_rho.getSize(); #ifdef HAVE_OPENMP - #pragma omp parallel for, if( tnlOmp::isEnabled() ) + #pragma omp parallel for, if( tnlHost::isOMPEnabled() ) #endif for( IndexType c = 0; c < size; c++ ) { @@ -447,7 +447,7 @@ void tnlNavierStokesSolver< AdvectionScheme, writePhysicalVariables( time, -4 ); #ifdef HAVE_OPENMP - #pragma omp parallel for, if( tnlOmp::isEnabled() ) + #pragma omp parallel for, if( tnlHost::isOMPEnabled() ) #endif for( IndexType j = 0; j < ySize; j ++ ) for( IndexType i = 0; i < xSize; i ++ ) diff --git a/src/solvers/linear/krylov/tnlTFQMRSolver.h b/src/solvers/linear/krylov/tnlTFQMRSolver.h index ab25f3e349e752bdac403a2323fada4fe34aa30e..32d250f52a18ecf23f961dabc04b1c5a0b89e1e4 100644 --- a/src/solvers/linear/krylov/tnlTFQMRSolver.h +++ b/src/solvers/linear/krylov/tnlTFQMRSolver.h @@ -75,7 +75,9 @@ class tnlTFQMRSolver : public tnlObject, bool setSize( IndexType size ); - tnlVector< RealType, Device, IndexType > d, r, w, u, v, r_ast, Au; + tnlVector< RealType, Device, IndexType > d, r, w, u, v, r_ast, Au, M_tmp; + + IndexType size; const MatrixType* matrix; const PreconditionerType* preconditioner; diff --git a/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h b/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h index e2f7d210d3ea93e4b257305f8079adc1c1b079a9..72dd40e313478ca3e6dcb53a8bd76fa93fb9f2ed 100644 --- a/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h +++ b/src/solvers/linear/krylov/tnlTFQMRSolver_impl.h @@ -18,17 +18,12 @@ #ifndef tnlTFQMRSolver_implH #define tnlTFQMRSolver_implH -template< typename RealType, - typename Vector > -RealType computeTFQMRNewP( Vector& p, - const Vector&r, - const RealType& beta, - const RealType& omega, - const Vector& Ap ); - template< typename Matrix, typename Preconditioner > tnlTFQMRSolver< Matrix, Preconditioner > :: tnlTFQMRSolver() +: size( 0 ), + matrix( 0 ), + preconditioner( 0 ) { } @@ -83,15 +78,29 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector& dbgFunctionName( "tnlTFQMRSolver", "Solve" ); if( ! this->setSize( matrix -> getRows() ) ) return false; - RealType tau, theta, eta, rho, alpha, w_norm; - RealType b_norm = b. lpNorm( 2.0 ); - if( b_norm == 0.0 ) - b_norm = 1.0; + RealType tau, theta, eta, rho, alpha, b_norm, w_norm; - this->matrix -> vectorProduct( x, r ); - r. addVector( b, 1.0, -1.0 ); + if( preconditioner ) { + preconditioner -> solve( b, M_tmp ); + b_norm = M_tmp. lpNorm( ( RealType ) 2.0 ); + + matrix -> vectorProduct( x, M_tmp ); + M_tmp.addVector( b, 1.0, -1.0 ); + preconditioner -> solve( M_tmp, r ); + } + else { + b_norm = b. lpNorm( 2.0 ); + matrix -> vectorProduct( x, r ); + r.addVector( b, 1.0, -1.0 ); + } w = u = r; - matrix -> vectorProduct( u, Au ); + if( preconditioner ) { + matrix -> vectorProduct( u, M_tmp ); + preconditioner -> solve( M_tmp, Au ); + } + else { + matrix -> vectorProduct( u, Au ); + } v = Au; d. setValue( 0.0 ); tau = r. lpNorm( 2.0 ); @@ -101,6 +110,9 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector& // only to avoid compiler warning; alpha is initialized inside the loop alpha = 0.0; + if( b_norm == 0.0 ) + b_norm = 1.0; + this->resetIterations(); this->setResidue( tau / b_norm ); @@ -114,7 +126,13 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector& else { // not necessary in odd iter since the previous iteration // already computed v_{m+1} = A*u_{m+1} - matrix -> vectorProduct( u, Au ); + if( preconditioner ) { + matrix -> vectorProduct( u, M_tmp ); + preconditioner -> solve( M_tmp, Au ); + } + else { + matrix -> vectorProduct( u, Au ); + } } w.addVector( Au, -alpha ); d.addVector( u, 1.0, theta * theta * eta / alpha ); @@ -137,7 +155,13 @@ bool tnlTFQMRSolver< Matrix, Preconditioner > :: solve( const Vector& b, Vector& u.addVector( w, 1.0, beta ); v.addVector( Au, beta, beta * beta ); - matrix -> vectorProduct( u, Au ); + if( preconditioner ) { + matrix -> vectorProduct( u, M_tmp ); + preconditioner -> solve( M_tmp, Au ); + } + else { + matrix -> vectorProduct( u, Au ); + } v.addVector( Au, 1.0 ); } else { @@ -165,13 +189,17 @@ template< typename Matrix, typename Preconditioner > bool tnlTFQMRSolver< Matrix, Preconditioner > :: setSize( IndexType size ) { + if( this->size == size ) + return true; + this->size = size; if( ! d. setSize( size ) || ! r. setSize( size ) || ! w. setSize( size ) || ! u. setSize( size ) || ! v. setSize( size ) || ! r_ast. setSize( size ) || - ! Au. setSize( size ) ) + ! Au. setSize( size ) || + ! M_tmp. setSize( size ) ) { cerr << "I am not able to allocate all supporting vectors for the TFQMR solver." << endl; return false; diff --git a/src/solvers/ode/tnlEulerSolver.h b/src/solvers/ode/tnlEulerSolver.h index 428bf19d550c5cb7b932c234ab1f7b9c7360b38b..13df781c2632042d34750083c4ce99e4958e30de 100644 --- a/src/solvers/ode/tnlEulerSolver.h +++ b/src/solvers/ode/tnlEulerSolver.h @@ -22,6 +22,7 @@ #include <config/tnlConfigDescription.h> #include <solvers/ode/tnlExplicitSolver.h> #include <config/tnlParameterContainer.h> +#include <core/tnlTimer.h> template< typename Problem > class tnlEulerSolver : public tnlExplicitSolver< Problem > @@ -61,7 +62,7 @@ class tnlEulerSolver : public tnlExplicitSolver< Problem > RealType cflCondition; - tnlTimerRT timer, updateTimer; + //tnlTimer timer, updateTimer; }; #include <solvers/ode/tnlEulerSolver_impl.h> diff --git a/src/solvers/ode/tnlEulerSolver_impl.h b/src/solvers/ode/tnlEulerSolver_impl.h index b682fcefe96a77eb1de9d5479a30615cdeba68db..b9284d6a609f5833c663f4eb33e0051b01aabc5c 100644 --- a/src/solvers/ode/tnlEulerSolver_impl.h +++ b/src/solvers/ode/tnlEulerSolver_impl.h @@ -32,8 +32,8 @@ template< typename Problem > tnlEulerSolver< Problem > :: tnlEulerSolver() : cflCondition( 0.0 ) { - timer.reset(); - updateTimer.reset(); + //timer.reset(); + //updateTimer.reset(); }; template< typename Problem > @@ -80,7 +80,7 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u ) /**** * First setup the supporting meshes k1...k5 and k_tmp. */ - timer.start(); + //timer.start(); if( ! k1. setLike( u ) ) { cerr << "I do not have enough memory to allocate a supporting grid for the Euler explicit solver." << endl; @@ -109,9 +109,9 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u ) /**** * Compute the RHS */ - timer.stop(); + //timer.stop(); this->problem->getExplicitRHS( time, currentTau, u, k1 ); - timer.start(); + //timer.start(); RealType lastResidue = this->getResidue(); RealType maxResidue( 0.0 ); @@ -125,9 +125,9 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u ) } } RealType newResidue( 0.0 ); - updateTimer.start(); + //updateTimer.start(); computeNewTimeLevel( u, currentTau, newResidue ); - updateTimer.stop(); + //updateTimer.stop(); this->setResidue( newResidue ); /**** @@ -156,8 +156,8 @@ bool tnlEulerSolver< Problem > :: solve( DofVectorType& u ) ( this->getConvergenceResidue() != 0.0 && this->getResidue() < this->getConvergenceResidue() ) ) { this->refreshSolverMonitor(); - std::cerr << std::endl << "RHS Timer = " << timer.getTime() << std::endl; - std::cerr << std::endl << "Update Timer = " << updateTimer.getTime() << std::endl; + //std::cerr << std::endl << "RHS Timer = " << timer.getRealTime() << std::endl; + //std::cerr << std::endl << "Update Timer = " << updateTimer.getRealTime() << std::endl; return true; } @@ -175,15 +175,15 @@ void tnlEulerSolver< Problem > :: computeNewTimeLevel( DofVectorType& u, RealType& currentResidue ) { RealType localResidue = RealType( 0.0 ); - IndexType size = k1. getSize(); + const IndexType size = k1. getSize(); RealType* _u = u. getData(); RealType* _k1 = k1. getData(); if( std::is_same< DeviceType, tnlHost >::value ) { -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:localResidue) firstprivate( _u, _k1, tau ) if( tnlOmp::isEnabled() ) -#endif +//#ifdef HAVE_OPENMP +//#pragma omp parallel for reduction(+:localResidue) firstprivate( _u, _k1, tau ) if( tnlHost::isOMPEnabled() ) +//#endif for( IndexType i = 0; i < size; i ++ ) { const RealType add = tau * _k1[ i ]; diff --git a/src/solvers/ode/tnlMersonSolver_impl.h b/src/solvers/ode/tnlMersonSolver_impl.h index 4c0fdad7e245af80f89f2206e5d5b3776df4cbc8..fee7015670cd1cfcc78f1862fe38e3e305059b59 100644 --- a/src/solvers/ode/tnlMersonSolver_impl.h +++ b/src/solvers/ode/tnlMersonSolver_impl.h @@ -274,28 +274,28 @@ void tnlMersonSolver< Problem > :: computeKFunctions( DofVectorType& u, this->problem->getExplicitRHS( time, tau, u, k1 ); #ifdef HAVE_OPENMP - #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, tau, tau_3 ) if( tnlOmp::isEnabled() ) + #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, tau, tau_3 ) if( tnlHost::isOMPEnabled() ) #endif for( IndexType i = 0; i < size; i ++ ) _kAux[ i ] = _u[ i ] + tau * ( 1.0 / 3.0 * _k1[ i ] ); this->problem->getExplicitRHS( time + tau_3, tau, kAux, k2 ); #ifdef HAVE_OPENMP - #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k2, tau, tau_3 ) if( tnlOmp::isEnabled() ) + #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k2, tau, tau_3 ) if( tnlHost::isOMPEnabled() ) #endif for( IndexType i = 0; i < size; i ++ ) _kAux[ i ] = _u[ i ] + tau * 1.0 / 6.0 * ( _k1[ i ] + _k2[ i ] ); this->problem->getExplicitRHS( time + tau_3, tau, kAux, k3 ); #ifdef HAVE_OPENMP - #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, tau, tau_3 ) if( tnlOmp::isEnabled() ) + #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, tau, tau_3 ) if( tnlHost::isOMPEnabled() ) #endif for( IndexType i = 0; i < size; i ++ ) _kAux[ i ] = _u[ i ] + tau * ( 0.125 * _k1[ i ] + 0.375 * _k3[ i ] ); this->problem->getExplicitRHS( time + 0.5 * tau, tau, kAux, k4 ); #ifdef HAVE_OPENMP - #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, _k4, tau, tau_3 ) if( tnlOmp::isEnabled() ) + #pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, _k4, tau, tau_3 ) if( tnlHost::isOMPEnabled() ) #endif for( IndexType i = 0; i < size; i ++ ) _kAux[ i ] = _u[ i ] + tau * ( 0.5 * _k1[ i ] - 1.5 * _k3[ i ] + 2.0 * _k4[ i ] ); @@ -454,7 +454,7 @@ void tnlMersonSolver< Problem > :: computeNewTimeLevel( DofVectorType& u, if( DeviceType :: getDevice() == tnlHostDevice ) { #ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u, _k1, _k4, _k5, tau ) if( tnlOmp::isEnabled() ) +#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u, _k1, _k4, _k5, tau ) if( tnlHost::isOMPEnabled() ) #endif for( IndexType i = 0; i < size; i ++ ) { diff --git a/src/solvers/pde/tnlExplicitTimeStepper.h b/src/solvers/pde/tnlExplicitTimeStepper.h index 3394733c765b59eb88328f75b49e7a196819d87f..9c251443789c79ad7c453a75640df1d16a36ddab 100644 --- a/src/solvers/pde/tnlExplicitTimeStepper.h +++ b/src/solvers/pde/tnlExplicitTimeStepper.h @@ -21,7 +21,7 @@ #include <solvers/ode/tnlODESolverMonitor.h> #include <config/tnlConfigDescription.h> #include <config/tnlParameterContainer.h> -#include <core/tnlTimerRT.h> +#include <core/tnlTimer.h> #include <core/tnlLogger.h> @@ -85,7 +85,9 @@ class tnlExplicitTimeStepper MeshDependentDataType* meshDependentData; - tnlTimerRT explicitUpdaterTimer, mainTimer; + tnlTimer preIterateTimer, explicitUpdaterTimer, mainTimer, postIterateTimer; + + long long int allIterations; }; #include <solvers/pde/tnlExplicitTimeStepper_impl.h> diff --git a/src/solvers/pde/tnlExplicitTimeStepper_impl.h b/src/solvers/pde/tnlExplicitTimeStepper_impl.h index f071b3af0acf7b2ba7c543bfda8362743f6ed773..ea1fb5a436a2e6f506892e2c63ad4c38ce5afc61 100644 --- a/src/solvers/pde/tnlExplicitTimeStepper_impl.h +++ b/src/solvers/pde/tnlExplicitTimeStepper_impl.h @@ -27,7 +27,8 @@ tnlExplicitTimeStepper< Problem, OdeSolver >:: tnlExplicitTimeStepper() : odeSolver( 0 ), problem( 0 ), - timeStep( 0 ) + timeStep( 0 ), + allIterations( 0 ) { }; @@ -58,6 +59,8 @@ init( const MeshType& mesh ) { this->explicitUpdaterTimer.reset(); this->mainTimer.reset(); + this->preIterateTimer.reset(); + this->postIterateTimer.reset(); return true; } @@ -123,8 +126,12 @@ solve( const RealType& time, this->odeSolver->setMaxTau( ( stopTime - time ) / ( typename OdeSolver< Problem >::RealType ) this->odeSolver->getMinIterations() ); this->mesh = &mesh; this->meshDependentData = &meshDependentData; - return this->odeSolver->solve( dofVector ); + if( ! this->odeSolver->solve( dofVector ) ) + return false; + this->problem->setExplicitBoundaryConditions( stopTime, *( this->mesh ), dofVector, *( this->meshDependentData ) ); mainTimer.stop(); + this->allIterations += this->odeSolver->getIterations(); + return true; } template< typename Problem, @@ -136,6 +143,7 @@ getExplicitRHS( const RealType& time, DofVectorType& u, DofVectorType& fu ) { + this->preIterateTimer.start(); if( ! this->problem->preIterate( time, tau, *( this->mesh), @@ -146,9 +154,12 @@ getExplicitRHS( const RealType& time, return; //return false; // TODO: throw exception } + this->preIterateTimer.stop(); this->explicitUpdaterTimer.start(); + this->problem->setExplicitBoundaryConditions( time, *( this->mesh ), u, *( this->meshDependentData ) ); this->problem->getExplicitRHS( time, tau, *( this->mesh ), u, fu, *( this->meshDependentData ) ); this->explicitUpdaterTimer.stop(); + this->postIterateTimer.start(); if( ! this->problem->postIterate( time, tau, *( this->mesh ), @@ -159,6 +170,7 @@ getExplicitRHS( const RealType& time, return; //return false; // TODO: throw exception } + this->postIterateTimer.stop(); } template< typename Problem, @@ -167,8 +179,15 @@ bool tnlExplicitTimeStepper< Problem, OdeSolver >:: writeEpilog( tnlLogger& logger ) { - logger.writeParameter< double >( "Explicit update computation time:", this->explicitUpdaterTimer.getTime() ); - logger.writeParameter< double >( "Explicit time stepper time:", this->mainTimer.getTime() ); + logger.writeParameter< long long int >( "Iterations count:", this->allIterations ); + logger.writeParameter< const char* >( "Pre-iterate time:", "" ); + this->preIterateTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Explicit update computation:", "" ); + this->explicitUpdaterTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Explicit time stepper time:", "" ); + this->mainTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Post-iterate time:", "" ); + this->postIterateTimer.writeLog( logger, 1 ); return true; } diff --git a/src/solvers/pde/tnlExplicitUpdater.h b/src/solvers/pde/tnlExplicitUpdater.h index 6956c8b29e9a200d054677f7ec31e737c78fe66b..597222b741bcd0c8aad030559af109d17a4d563c 100644 --- a/src/solvers/pde/tnlExplicitUpdater.h +++ b/src/solvers/pde/tnlExplicitUpdater.h @@ -19,6 +19,7 @@ #define TNLEXPLICITUPDATER_H_ #include <functions/tnlFunctionAdapter.h> +#include <core/tnlTimer.h> template< typename Real, typename MeshFunction, @@ -51,7 +52,8 @@ class tnlExplicitUpdaterTraverserUserData rightHandSide( &rightHandSide ), u( &u ), fu( &fu ) - {}; + { + }; }; @@ -72,6 +74,14 @@ class tnlExplicitUpdater DifferentialOperator, BoundaryConditions, RightHandSide > TraverserUserData; + + tnlExplicitUpdater() + : gpuTransferTimer( 0 ){} + + void setGPUTransferTimer( tnlTimer& timer ) + { + this->gpuTransferTimer = &timer; + } template< typename EntityType > void update( const RealType& time, @@ -126,6 +136,10 @@ class tnlExplicitUpdater *userData.time ); } }; + + protected: + + tnlTimer* gpuTransferTimer; }; #include <solvers/pde/tnlExplicitUpdater_impl.h> diff --git a/src/solvers/pde/tnlExplicitUpdater_impl.h b/src/solvers/pde/tnlExplicitUpdater_impl.h index 16a2c0346402907c21701d280609deddd27a3cad..a2c3ff1ce79b1547716f08b5ca1c69d5f011f5ab 100644 --- a/src/solvers/pde/tnlExplicitUpdater_impl.h +++ b/src/solvers/pde/tnlExplicitUpdater_impl.h @@ -23,6 +23,8 @@ #include <mesh/grids/tnlTraverser_Grid2D.h> #include <mesh/grids/tnlTraverser_Grid3D.h> +#include "tnlExplicitUpdater.h" + template< typename Mesh, typename MeshFunction, typename DifferentialOperator, @@ -60,12 +62,17 @@ update( const RealType& time, } if( std::is_same< DeviceType, tnlCuda >::value ) { + if( this->gpuTransferTimer ) + this->gpuTransferTimer->start(); RealType* kernelTime = tnlCuda::passToDevice( time ); DifferentialOperator* kernelDifferentialOperator = tnlCuda::passToDevice( differentialOperator ); BoundaryConditions* kernelBoundaryConditions = tnlCuda::passToDevice( boundaryConditions ); RightHandSide* kernelRightHandSide = tnlCuda::passToDevice( rightHandSide ); MeshFunction* kernelU = tnlCuda::passToDevice( u ); MeshFunction* kernelFu = tnlCuda::passToDevice( fu ); + if( this->gpuTransferTimer ) + this->gpuTransferTimer->stop(); + TraverserUserData userData( *kernelTime, *kernelDifferentialOperator, *kernelBoundaryConditions, *kernelRightHandSide, *kernelU, *kernelFu ); checkCudaDevice; tnlTraverser< MeshType, EntityType > meshTraverser; @@ -78,7 +85,10 @@ update( const RealType& time, ( mesh, userData ); - checkCudaDevice; + if( this->gpuTransferTimer ) + this->gpuTransferTimer->start(); + + checkCudaDevice; tnlCuda::freeFromDevice( kernelTime ); tnlCuda::freeFromDevice( kernelDifferentialOperator ); tnlCuda::freeFromDevice( kernelBoundaryConditions ); @@ -86,6 +96,10 @@ update( const RealType& time, tnlCuda::freeFromDevice( kernelU ); tnlCuda::freeFromDevice( kernelFu ); checkCudaDevice; + + if( this->gpuTransferTimer ) + this->gpuTransferTimer->stop(); + } } diff --git a/src/solvers/pde/tnlLinearSystemAssembler.h b/src/solvers/pde/tnlLinearSystemAssembler.h index 14753fb7877ae1f1126b541345099a1da28a916f..74b7b6e44056915b0b9fbee59f1cd56d132b74f6 100644 --- a/src/solvers/pde/tnlLinearSystemAssembler.h +++ b/src/solvers/pde/tnlLinearSystemAssembler.h @@ -121,14 +121,13 @@ class tnlLinearSystemAssembler const EntityType& entity ) { ( *userData.b )[ entity.getIndex() ] = 0.0; - userData.boundaryConditions->updateLinearSystem - ( *userData.time + *userData.tau, - mesh, - entity.getIndex(), + userData.boundaryConditions->setMatrixElements + ( *userData.u, entity, - *userData.u, - *userData.b, - *userData.matrix ); + *userData.time + *userData.tau, + *userData.tau, + *userData.matrix, + *userData.b ); } }; @@ -143,15 +142,13 @@ class tnlLinearSystemAssembler const EntityType& entity ) { ( *userData.b )[ entity.getIndex() ] = 0.0; - userData.differentialOperator->updateLinearSystem - ( *userData.time, - *userData.tau, - mesh, - entity.getIndex(), + userData.differentialOperator->setMatrixElements + ( *userData.u, entity, - *userData.u, - *userData.b, - *userData.matrix ); + *userData.time + *userData.tau, + *userData.tau, + *userData.matrix, + *userData.b ); typedef tnlFunctionAdapter< MeshType, RightHandSide > RhsFunctionAdapter; typedef tnlFunctionAdapter< MeshType, MeshFunction > MeshFunctionAdapter; diff --git a/src/solvers/pde/tnlPDESolver.h b/src/solvers/pde/tnlPDESolver.h index 87c1f18e5b36a0814455703c9d689a54004f043b..03df9d6e00b961f5e424a5d8b3a633da8db781cc 100644 --- a/src/solvers/pde/tnlPDESolver.h +++ b/src/solvers/pde/tnlPDESolver.h @@ -21,7 +21,6 @@ #include <core/tnlObject.h> #include <config/tnlConfigDescription.h> #include <config/tnlParameterContainer.h> -#include <solvers/tnlSolverMonitor.h> #include <core/tnlLogger.h> template< typename Problem, @@ -73,13 +72,9 @@ class tnlPDESolver : public tnlObject const RealType& getSnapshotPeriod() const; - void setIoRtTimer( tnlTimerRT& ioRtTimer); + void setIoTimer( tnlTimer& ioTimer); - void setComputeRtTimer( tnlTimerRT& computeRtTimer ); - - void setIoCpuTimer( tnlTimerCPU& ioCpuTimer ); - - void setComputeCpuTimer( tnlTimerCPU& computeCpuTimer ); + void setComputeTimer( tnlTimer& computeTimer ); bool solve(); @@ -99,10 +94,7 @@ class tnlPDESolver : public tnlObject ProblemType* problem; - tnlTimerRT *ioRtTimer, *computeRtTimer; - - tnlTimerCPU *ioCpuTimer, *computeCpuTimer; - + tnlTimer *ioTimer, *computeTimer; }; #include <solvers/pde/tnlPDESolver_impl.h> diff --git a/src/solvers/pde/tnlPDESolver_impl.h b/src/solvers/pde/tnlPDESolver_impl.h index f7df40fc84992153b341a38e9f2aad515b2c0484..b0356073984a277c1af4930c41dff14927e3ed27 100644 --- a/src/solvers/pde/tnlPDESolver_impl.h +++ b/src/solvers/pde/tnlPDESolver_impl.h @@ -18,6 +18,9 @@ #ifndef TNLPDESOLVER_IMPL_H_ #define TNLPDESOLVER_IMPL_H_ +#include "tnlPDESolver.h" + + template< typename Problem, typename TimeStepper > tnlPDESolver< Problem, TimeStepper >:: @@ -29,10 +32,8 @@ tnlPDESolver() timeStep( 1.0 ), timeStepOrder( 0.0 ), problem( 0 ), - ioRtTimer( 0 ), - computeRtTimer( 0 ), - ioCpuTimer( 0 ), - computeCpuTimer( 0 ) + ioTimer( 0 ), + computeTimer( 0 ) { } @@ -288,27 +289,15 @@ getTimeStepOrder() const } template< typename Problem, typename TimeStepper > -void tnlPDESolver< Problem, TimeStepper > :: setIoRtTimer( tnlTimerRT& ioRtTimer ) -{ - this->ioRtTimer = &ioRtTimer; -} - -template< typename Problem, typename TimeStepper > -void tnlPDESolver< Problem, TimeStepper > :: setComputeRtTimer( tnlTimerRT& computeRtTimer ) -{ - this->computeRtTimer = &computeRtTimer; -} - -template< typename Problem, typename TimeStepper > -void tnlPDESolver< Problem, TimeStepper > :: setIoCpuTimer( tnlTimerCPU& ioCpuTimer ) +void tnlPDESolver< Problem, TimeStepper > :: setIoTimer( tnlTimer& ioTimer ) { - this->ioCpuTimer = &ioCpuTimer; + this->ioTimer = &ioTimer; } template< typename Problem, typename TimeStepper > -void tnlPDESolver< Problem, TimeStepper > :: setComputeCpuTimer( tnlTimerCPU& computeCpuTimer ) +void tnlPDESolver< Problem, TimeStepper > :: setComputeTimer( tnlTimer& computeTimer ) { - this->computeCpuTimer = & computeCpuTimer; + this->computeTimer = &computeTimer; } template< typename Problem, typename TimeStepper > @@ -330,23 +319,17 @@ solve() IndexType step( 0 ); IndexType allSteps = ceil( ( this->finalTime - this->initialTime ) / this->snapshotPeriod ); - this->ioRtTimer->reset(); - this->ioCpuTimer->reset(); - this->computeRtTimer->reset(); - this->computeCpuTimer->reset(); + this->ioTimer->reset(); + this->computeTimer->reset(); - this->ioRtTimer->start(); - this->ioCpuTimer->start(); + this->ioTimer->start(); if( ! this->problem->makeSnapshot( t, step, mesh, this->dofs, this->meshDependentData ) ) { cerr << "Making the snapshot failed." << endl; return false; - } - - this->ioRtTimer->stop(); - this->ioCpuTimer->stop(); - this->computeRtTimer->start(); - this->computeCpuTimer->start(); + } + this->ioTimer->stop(); + this->computeTimer->start(); /**** * Initialize the time stepper @@ -363,22 +346,17 @@ solve() step ++; t += tau; - this->ioRtTimer->start(); - this->ioCpuTimer->start(); - this->computeRtTimer->stop(); - this->computeCpuTimer->stop(); - + this->ioTimer->start(); + this->computeTimer->stop(); if( ! this->problem->makeSnapshot( t, step, mesh, this->dofs, this->meshDependentData ) ) { cerr << "Making the snapshot failed." << endl; return false; } - - this->ioRtTimer->stop(); - this->ioCpuTimer->stop(); - this->computeRtTimer->start(); - this->computeCpuTimer->start(); + this->ioTimer->stop(); + this->computeTimer->start(); } + this->computeTimer->stop(); return true; } @@ -387,7 +365,8 @@ bool tnlPDESolver< Problem, TimeStepper >:: writeEpilog( tnlLogger& logger ) const { - return this->timeStepper->writeEpilog( logger ); + return ( this->timeStepper->writeEpilog( logger ) && + this->problem->writeEpilog( logger ) ); } #endif /* TNLPDESOLVER_IMPL_H_ */ diff --git a/src/solvers/pde/tnlSemiImplicitTimeStepper.h b/src/solvers/pde/tnlSemiImplicitTimeStepper.h index 05f16704e760f44ab54d433211a56ddaa5a29623..5ce8dbf48b38f2db3d5992510a777d3072fe6235 100644 --- a/src/solvers/pde/tnlSemiImplicitTimeStepper.h +++ b/src/solvers/pde/tnlSemiImplicitTimeStepper.h @@ -80,9 +80,11 @@ class tnlSemiImplicitTimeStepper RealType timeStep; - tnlTimerRT preIterateTimer, linearSystemAssemblerTimer, linearSystemSolverTimer, postIterateTimer; + tnlTimer preIterateTimer, linearSystemAssemblerTimer, linearSystemSolverTimer, postIterateTimer; bool verbose; + + long long int allIterations; }; #include <solvers/pde/tnlSemiImplicitTimeStepper_impl.h> diff --git a/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h b/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h index 4a873b67d3ae7f5eea6aceb7571e009b17de3fc7..34dede52de49d918cc624efef53cceb9245048d6 100644 --- a/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h +++ b/src/solvers/pde/tnlSemiImplicitTimeStepper_impl.h @@ -20,13 +20,16 @@ #include <core/mfuncs.h> +#include "tnlSemiImplicitTimeStepper.h" + template< typename Problem, typename LinearSystemSolver > tnlSemiImplicitTimeStepper< Problem, LinearSystemSolver >:: tnlSemiImplicitTimeStepper() : problem( 0 ), linearSystemSolver( 0 ), - timeStep( 0 ) + timeStep( 0 ), + allIterations( 0 ) { }; @@ -75,6 +78,7 @@ init( const MeshType& mesh ) return false; this->linearSystemAssemblerTimer.reset(); this->linearSystemSolverTimer.reset(); + this->allIterations = 0; return true; } @@ -187,6 +191,7 @@ solve( const RealType& time, return false; } this->linearSystemSolverTimer.stop(); + this->allIterations += this->linearSystemSolver->getIterations(); //if( verbose ) // cout << endl; @@ -214,10 +219,15 @@ bool tnlSemiImplicitTimeStepper< Problem, LinearSystemSolver >:: writeEpilog( tnlLogger& logger ) { - logger.writeParameter< double >( "Pre-iterate time:", this->preIterateTimer.getTime() ); - logger.writeParameter< double >( "Linear system assembler time:", this->linearSystemAssemblerTimer.getTime() ); - logger.writeParameter< double >( "Linear system solver time:", this->linearSystemSolverTimer.getTime() ); - logger.writeParameter< double >( "Post-iterate time:", this->postIterateTimer.getTime() ); + logger.writeParameter< long long int >( "Iterations count:", this->allIterations ); + logger.writeParameter< const char* >( "Pre-iterate time:", "" ); + this->preIterateTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Linear system assembler time:", "" ); + this->linearSystemAssemblerTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Linear system solver time:", "" ); + this->linearSystemSolverTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Post-iterate time:", "" ); + this->postIterateTimer.writeLog( logger, 1 ); return true; } diff --git a/src/solvers/tnlIterativeSolver_impl.h b/src/solvers/tnlIterativeSolver_impl.h index 5841ec5b3cda1417c9e341032a26697ee0c10140..281948456788f74507f9e475654d961f7be242a0 100644 --- a/src/solvers/tnlIterativeSolver_impl.h +++ b/src/solvers/tnlIterativeSolver_impl.h @@ -38,7 +38,7 @@ template< typename Real, typename Index > void tnlIterativeSolver< Real, Index> :: configSetup( tnlConfigDescription& config, const tnlString& prefix ) { - config.addEntry< int > ( prefix + "max-iterations", "Maximal number of iterations the solver may perform.", 100000 ); + config.addEntry< int > ( prefix + "max-iterations", "Maximal number of iterations the solver may perform.", 100000000000 ); config.addEntry< int > ( prefix + "min-iterations", "Minimal number of iterations the solver must perform.", 0 ); config.addEntry< double >( prefix + "convergence-residue", "Convergence occurs when the residue drops bellow this limit.", 1.0e-6 ); config.addEntry< double >( prefix + "divergence-residue", "Divergence occurs when the residue exceeds given limit.", DBL_MAX ); diff --git a/src/solvers/tnlSolverConfig_impl.h b/src/solvers/tnlSolverConfig_impl.h index 01d005335aac5b531af61f05bb0768317688d382..fcf4221c421827eb730f17ffaa71f547e17ac7f9 100644 --- a/src/solvers/tnlSolverConfig_impl.h +++ b/src/solvers/tnlSolverConfig_impl.h @@ -156,7 +156,7 @@ bool tnlSolverConfig< ConfigTag, ProblemConfig >::configSetup( tnlConfigDescript config.addDelimiter( " === Logs and messages ===" ); config.addEntry< int >( "verbose", "Set the verbose mode. The higher number the more messages are generated.", 1 ); - config.addEntry< tnlString >( "log-file", "Log file for the computation." ); + config.addEntry< tnlString >( "log-file", "Log file for the computation.", "log.txt" ); config.addEntry< int >( "log-width", "Number of columns of the log table.", 80 ); return true; diff --git a/src/solvers/tnlSolverStarter.h b/src/solvers/tnlSolverStarter.h index 7b9bc2d8f3757487ac9f479ac274ab18ddef4ddd..87c67ac8d72ea3c6fd7575b4c0e63375558001be 100644 --- a/src/solvers/tnlSolverStarter.h +++ b/src/solvers/tnlSolverStarter.h @@ -19,8 +19,7 @@ #define TNLSOLVERSTARTER_H_ #include <config/tnlParameterContainer.h> -#include <core/tnlTimerRT.h> -#include <core/tnlTimerCPU.h> +#include <core/tnlTimer.h> #include <ostream> template< typename MeshConfig > @@ -45,9 +44,7 @@ class tnlSolverStarter int logWidth; - tnlTimerRT ioRtTimer, computeRtTimer, totalRtTimer; - - tnlTimerCPU ioCpuTimer, computeCpuTimer, totalCpuTimer; + tnlTimer ioTimer, computeTimer, totalTimer; }; #include <solvers/tnlSolverStarter_impl.h> diff --git a/src/solvers/tnlSolverStarter_impl.h b/src/solvers/tnlSolverStarter_impl.h index 3f9e5fcc0201adb7d994497a48a9868897ac2f94..2126dec4cbc478522f809eca8530566ba11cccfd 100644 --- a/src/solvers/tnlSolverStarter_impl.h +++ b/src/solvers/tnlSolverStarter_impl.h @@ -21,7 +21,6 @@ #include <tnlConfig.h> #include <core/tnlLogger.h> #include <core/tnlString.h> -#include <core/tnlOmp.h> #include <core/tnlCuda.h> #include <solvers/ode/tnlMersonSolver.h> #include <solvers/ode/tnlEulerSolver.h> @@ -92,7 +91,7 @@ bool tnlSolverStarter< ConfigTag > :: run( const tnlParameterContainer& paramete /**** * Create and set-up the problem */ - if( ! tnlOmp::setup( parameters ) || + if( ! tnlHost::setup( parameters ) || ! tnlCuda::setup( parameters ) ) return false; Problem problem; @@ -352,11 +351,14 @@ class tnlSolverStarterExplicitTimeStepperSetter typedef typename Problem::IndexType IndexType; typedef tnlODESolverMonitor< RealType, IndexType > SolverMonitorType; + const int verbose = parameters.getParameter< int >( "verbose" ); + ExplicitSolver explicitSolver; explicitSolver.setup( parameters ); - int verbose = parameters.getParameter< int >( "verbose" ); explicitSolver.setVerbose( verbose ); + SolverMonitorType odeSolverMonitor; + odeSolverMonitor.setVerbose( verbose ); if( ! problem.getSolverMonitor() ) explicitSolver.setSolverMonitor( odeSolverMonitor ); else @@ -395,10 +397,13 @@ class tnlSolverStarterSemiImplicitTimeStepperSetter typedef typename Problem::IndexType IndexType; typedef tnlIterativeSolverMonitor< RealType, IndexType > SolverMonitorType; + const int verbose = parameters.getParameter< int >( "verbose" ); + LinearSystemSolverType linearSystemSolver; linearSystemSolver.setup( parameters ); SolverMonitorType solverMonitor; + solverMonitor.setVerbose( verbose ); if( ! problem.getSolverMonitor() ) linearSystemSolver.setSolverMonitor( solverMonitor ); else @@ -490,10 +495,8 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem, const tnlParameterContainer& parameters, TimeStepper& timeStepper ) { - this->totalCpuTimer.reset(); - this->totalRtTimer.reset(); - this->totalCpuTimer.start(); - this->totalRtTimer.start(); + this->totalTimer.reset(); + this->totalTimer.start(); /**** @@ -537,16 +540,10 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem, /**** * Set-up timers */ - this->computeRtTimer.reset(); - this->computeCpuTimer.reset(); - this->ioRtTimer.reset(); - this->ioRtTimer.stop(); - this->ioCpuTimer.reset(); - this->ioCpuTimer.stop(); - solver.setComputeRtTimer( this->computeRtTimer ); - solver.setComputeCpuTimer( this->computeCpuTimer ); - solver.setIoRtTimer( this->ioRtTimer ); - solver.setIoCpuTimer( this->ioCpuTimer ); + this->computeTimer.reset(); + this->ioTimer.reset(); + solver.setComputeTimer( this->computeTimer ); + solver.setIoTimer( this->ioTimer ); /**** * Start the solver @@ -574,10 +571,8 @@ bool tnlSolverStarter< ConfigTag > :: runPDESolver( Problem& problem, /**** * Stop timers */ - this->computeRtTimer.stop(); - this->computeCpuTimer.stop(); - this->totalCpuTimer.stop(); - this->totalRtTimer.stop(); + this->computeTimer.stop(); + this->totalTimer.stop(); /**** * Write an epilog @@ -610,15 +605,15 @@ bool tnlSolverStarter< ConfigTag > :: writeEpilog( ostream& str, const Solver& s logger.writeSeparator(); logger.writeCurrentTime( "Finished at:" ); if( ! solver.writeEpilog( logger ) ) - return false; - logger.writeParameter< double >( "IO Real Time:", this->ioRtTimer.getTime() ); - logger.writeParameter< double >( "IO CPU Time:", this->ioCpuTimer.getTime() ); - logger.writeParameter< double >( "Compute Real Time:", this->computeRtTimer.getTime() ); - logger.writeParameter< double >( "Compute CPU Time:", this->computeCpuTimer.getTime() ); - logger.writeParameter< double >( "Total Real Time:", this->totalRtTimer.getTime() ); - logger.writeParameter< double >( "Total CPU Time:", this->totalCpuTimer.getTime() ); + return false; + logger.writeParameter< const char* >( "Compute time:", "" ); + this->computeTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "I/O time:", "" ); + this->ioTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Total time:", "" ); + this->totalTimer.writeLog( logger, 1 ); char buf[ 256 ]; - sprintf( buf, "%f %%", 100 * ( ( double ) this->totalCpuTimer.getTime() ) / this->totalRtTimer.getTime() ); + sprintf( buf, "%f %%", 100 * ( ( double ) this->totalTimer.getCPUTime() ) / this->totalTimer.getRealTime() ); logger.writeParameter< char* >( "CPU usage:", buf ); logger.writeSeparator(); return true; diff --git a/src/solvers/tnlSolver_impl.h b/src/solvers/tnlSolver_impl.h index 4c89c6cc94952c94dd305cee20d56c4ebd628de7..58c1bce84ffbe1d8b7dc9dec946741c0221453af 100644 --- a/src/solvers/tnlSolver_impl.h +++ b/src/solvers/tnlSolver_impl.h @@ -21,7 +21,6 @@ #include <solvers/tnlSolverInitiator.h> #include <solvers/tnlSolverStarter.h> #include <solvers/tnlSolverConfig.h> -#include <core/tnlOmp.h> #include <core/tnlCuda.h> template< template< typename Real, typename Device, typename Index, typename MeshType, typename MeshConfig, typename SolverStarter > class ProblemSetter, @@ -36,7 +35,7 @@ run( int argc, char* argv[] ) ProblemConfig< MeshConfig >::configSetup( configDescription ); tnlSolverConfig< MeshConfig, ProblemConfig< MeshConfig> >::configSetup( configDescription ); configDescription.addDelimiter( "Parallelization setup:" ); - tnlOmp::configSetup( configDescription ); + tnlHost::configSetup( configDescription ); tnlCuda::configSetup( configDescription ); if( ! parseCommandLine( argc, argv, configDescription, parameters ) ) diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h index ed76f3e8b4d048fd5327392cf54a6dd8b9bc72a0..76269b7139f3c58d82721652f911297803a3ba26 100644 --- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h +++ b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace.h @@ -35,15 +35,15 @@ class BenchmarkLaplace< tnlGrid< 1,MeshReal, Device, MeshIndex >, Real, Index > const MeshEntity& entity, const RealType& time = 0.0 ) const; - __cuda_callable__ template< typename MeshEntity > + __cuda_callable__ Index getLinearSystemRowLength( const MeshType& mesh, const IndexType& index, const MeshEntity& entity ) const; template< typename MeshEntity, typename Vector, typename MatrixRow > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -77,15 +77,15 @@ class BenchmarkLaplace< tnlGrid< 2,MeshReal, Device, MeshIndex >, Real, Index > const MeshEntity& entity, const RealType& time = 0.0 ) const; - __cuda_callable__ template< typename MeshEntity > + __cuda_callable__ Index getLinearSystemRowLength( const MeshType& mesh, const IndexType& index, const MeshEntity& entity ) const; template< typename MeshEntity, typename Vector, typename MatrixRow > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -119,15 +119,15 @@ class BenchmarkLaplace< tnlGrid< 3,MeshReal, Device, MeshIndex >, Real, Index > const MeshEntity& entity, const RealType& time = 0.0 ) const; - __cuda_callable__ template< typename MeshEntity > + __cuda_callable__ Index getLinearSystemRowLength( const MeshType& mesh, const IndexType& index, const MeshEntity& entity ) const; template< typename MeshEntity, typename Vector, typename MatrixRow > __cuda_callable__ - void updateLinearSystem( const RealType& time, + void setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, diff --git a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h index 862eddc7878509a6dfde868bd31aacc0698db38f..6cec357de6f5a0a5cd5c6ae93668be721d48c559 100644 --- a/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h +++ b/tests/benchmarks/heat-equation-benchmark/BenchmarkLaplace_impl.h @@ -80,7 +80,7 @@ template< typename MeshReal, __cuda_callable__ void BenchmarkLaplace< tnlGrid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -188,7 +188,7 @@ template< typename MeshReal, __cuda_callable__ void BenchmarkLaplace< tnlGrid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, @@ -305,7 +305,7 @@ template< typename MeshReal, __cuda_callable__ void BenchmarkLaplace< tnlGrid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -updateLinearSystem( const RealType& time, +setMatrixElements( const RealType& time, const RealType& tau, const MeshType& mesh, const IndexType& index, diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h index b1300dfeca4d6d3390c485e92315f68e45672729..ba9ce459a1855d616f91fe4937a6d230662d7374 100644 --- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h +++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkRhs.h @@ -23,7 +23,7 @@ template< typename Mesh, typename Real >class HeatEquationBenchmarkRhs typedef typename MeshEntity::MeshType::VertexType VertexType; VertexType v = entity.getCenter(); return 0.0; - }; + } }; #endif /* HeatEquationBenchmarkRHS_H_ */ diff --git a/tests/benchmarks/tnl-benchmark-simple-heat-equation.h b/tests/benchmarks/tnl-benchmark-simple-heat-equation.h index 2d20cddce6144d668a933c471f8893d751e2394d..b30210815a3303fcc0206510c37a6cecdedf94af 100644 --- a/tests/benchmarks/tnl-benchmark-simple-heat-equation.h +++ b/tests/benchmarks/tnl-benchmark-simple-heat-equation.h @@ -22,6 +22,7 @@ #include <stdio.h> #include <config/tnlConfigDescription.h> #include <config/tnlParameterContainer.h> +#include <core/tnlTimer.h> #include <core/tnlTimerRT.h> #include <core/tnlCuda.h> @@ -447,8 +448,8 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters ) /**** * Initiation */ - Real* u = new Real[ gridXSize * gridYSize ]; - Real* aux = new Real[ gridXSize * gridYSize ]; + Real* __restrict__ u = new Real[ gridXSize * gridYSize ]; + Real* __restrict__ aux = new Real[ gridXSize * gridYSize ]; if( ! u || ! aux ) { cerr << "I am not able to allocate grid function for grid size " << gridXSize << "x" << gridYSize << "." << endl; @@ -484,9 +485,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters ) */ if( verbose ) cout << "Starting the solver main loop..." << endl; - tnlTimerRT timer, computationTimer; - computationTimer.reset(); - timer.reset(); + tnlTimer timer, computationTimer, updateTimer; timer.start(); Real time( 0.0 ); Index iteration( 0 ); @@ -531,6 +530,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters ) } computationTimer.stop(); + updateTimer.start(); Real absMax( 0.0 ), residue( 0.0 ); for( Index i = 0; i < dofsCount; i++ ) { @@ -540,7 +540,7 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters ) /*const Real a = fabs( aux[ i ] ); absMax = a > absMax ? a : absMax;*/ } - + updateTimer.stop(); time += currentTau; iteration++; @@ -550,8 +550,15 @@ bool solveHeatEquationHost( const tnlParameterContainer& parameters ) timer.stop(); if( verbose ) cout << endl << "Finished..." << endl; - cout << "Explicit update computation time is " << computationTimer.getTime() << " sec. i.e. " << computationTimer.getTime() / ( double ) iteration << "sec. per iteration." << endl; - cout << "Explicit time stepper time is " << timer.getTime() << " sec. i.e. " << timer.getTime() / ( double ) iteration << "sec. per iteration." << endl; + tnlLogger logger( 72, std::cout ); + logger.writeSeparator(); + logger.writeParameter< const char* >( "Compute time:", "" ); + timer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Explicit update computation:", "" ); + computationTimer.writeLog( logger, 1 ); + logger.writeParameter< const char* >( "Euler solver update:", "" ); + updateTimer.writeLog( logger, 1 ); + logger.writeSeparator(); /*** * Freeing allocated memory diff --git a/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h b/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h index 827e3c4bbc1c8e5edeb8fb9c5a1029c7700bceca..6b53f3d910a6a962d501fff696d3e3abda7b3bd6 100644 --- a/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h +++ b/tests/unit-tests/operators/diffusion/tnlOneSidedMeanCurvatureTest.h @@ -65,6 +65,8 @@ class tnlOneSidedMeanCurvatureTest this->setupMesh( meshSize ); ApproximateOperator approximateOperator( this->mesh ); ExactOperatorType exactOperator; + approximateOperator.setRegularizationEpsilon( 1.0 ); + exactOperator.setRegularizationEpsilon( 1.0 ); this->performTest( approximateOperator, exactOperator, errors, diff --git a/tools/tnl-compile.in b/tools/tnl-compile.in index b5bd16e7ae1c88cc5138239211b73170e523a2eb..3ded5560392e047176921d69b35d35a93909fcf8 100644 --- a/tools/tnl-compile.in +++ b/tools/tnl-compile.in @@ -7,7 +7,7 @@ CXX_STD_FLAGS="-std=c++11" for option in "$@" do case $option in - --cuda ) CUDA_FLAGS="-DHAVE_CUDA `tnl-cuda-arch`" ;; + --cuda ) CUDA_FLAGS="-DHAVE_CUDA --std c++11 `tnl-cuda-arch`" ;; --debug ) DEBUG_FLAGS="-g -O0" esac done