Loading src/Benchmarks/Benchmarks.h +0 −59 Original line number Diff line number Diff line Loading @@ -20,12 +20,6 @@ #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/SystemInfo.h> #include <TNL/Cuda/DeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -176,59 +170,6 @@ class Benchmark SolverMonitorType monitor; }; inline typename Logging::MetadataMap getHardwareMetadata() { const int cpu_id = 0; const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = convertToString( cacheSizes.L1data ) + ", " + convertToString( cacheSizes.L1instruction ) + ", " + convertToString( cacheSizes.L2 ) + ", " + convertToString( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Cuda::DeviceInfo::getActiveDevice(); const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) ); #endif #ifdef HAVE_MPI int nproc = 1; // check if MPI was initialized (some benchmarks do not initialize MPI even when // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly) if( TNL::MPI::Initialized() ) nproc = TNL::MPI::GetSize(); #endif typename Logging::MetadataMap metadata { { "host name", SystemInfo::getHostname() }, { "architecture", SystemInfo::getArchitecture() }, { "system", SystemInfo::getSystemName() }, { "system release", SystemInfo::getSystemRelease() }, { "start time", SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", convertToString( nproc ) }, #endif { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) }, { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) }, { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) }, { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL Loading src/Benchmarks/Benchmarks.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ #pragma once #include "Benchmarks.h" #include "FunctionTimer.h" #include "Utils.h" #include <iostream> #include <exception> Loading src/Benchmarks/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -11,10 +11,10 @@ add_subdirectory( Traversers ) set( headers Benchmarks.h Benchmarks.hpp FunctionTimer.h Logging.h CustomLogging.h JsonLogging.h Utils.h ) install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks ) src/Benchmarks/FunctionTimer.h→src/Benchmarks/Utils.h +150 −0 Original line number Diff line number Diff line /*************************************************************************** FunctionTimer.h - description Utils.h - description ------------------- begin : Dec 25, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. Loading @@ -14,12 +14,19 @@ #pragma once #include <tuple> #include <map> #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/SystemInfo.h> #include <TNL/Cuda/DeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -87,5 +94,57 @@ timeFunction( ComputeFunction compute, return std::make_tuple( loops, mean, stddev ); } inline std::map< std::string, std::string > getHardwareMetadata() { const int cpu_id = 0; const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); const std::string cacheInfo = std::to_string( cacheSizes.L1data ) + ", " + std::to_string( cacheSizes.L1instruction ) + ", " + std::to_string( cacheSizes.L2 ) + ", " + std::to_string( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Cuda::DeviceInfo::getActiveDevice(); const std::string deviceArch = std::to_string( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + std::to_string( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) ); #endif #ifdef HAVE_MPI int nproc = 1; // check if MPI was initialized (some benchmarks do not initialize MPI even when // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly) if( TNL::MPI::Initialized() ) nproc = TNL::MPI::GetSize(); #endif std::map< std::string, std::string > metadata { { "host name", SystemInfo::getHostname() }, { "architecture", SystemInfo::getArchitecture() }, { "system", SystemInfo::getSystemName() }, { "system release", SystemInfo::getSystemRelease() }, { "start time", SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", std::to_string( nproc ) }, #endif { "OpenMP enabled", std::to_string( Devices::Host::isOMPEnabled() ) }, { "OpenMP threads", std::to_string( Devices::Host::getMaxThreadsCount() ) }, { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", std::to_string( SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU threads per core", std::to_string( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU max frequency (MHz)", std::to_string( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", std::to_string( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) }, { "GPU clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, { "GPU global memory (GB)", std::to_string( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, { "GPU memory clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, { "GPU memory ECC enabled", std::to_string( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL Loading
src/Benchmarks/Benchmarks.h +0 −59 Original line number Diff line number Diff line Loading @@ -20,12 +20,6 @@ #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/SystemInfo.h> #include <TNL/Cuda/DeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -176,59 +170,6 @@ class Benchmark SolverMonitorType monitor; }; inline typename Logging::MetadataMap getHardwareMetadata() { const int cpu_id = 0; const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = convertToString( cacheSizes.L1data ) + ", " + convertToString( cacheSizes.L1instruction ) + ", " + convertToString( cacheSizes.L2 ) + ", " + convertToString( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Cuda::DeviceInfo::getActiveDevice(); const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) ); #endif #ifdef HAVE_MPI int nproc = 1; // check if MPI was initialized (some benchmarks do not initialize MPI even when // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly) if( TNL::MPI::Initialized() ) nproc = TNL::MPI::GetSize(); #endif typename Logging::MetadataMap metadata { { "host name", SystemInfo::getHostname() }, { "architecture", SystemInfo::getArchitecture() }, { "system", SystemInfo::getSystemName() }, { "system release", SystemInfo::getSystemRelease() }, { "start time", SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", convertToString( nproc ) }, #endif { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) }, { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) }, { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) }, { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL Loading
src/Benchmarks/Benchmarks.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ #pragma once #include "Benchmarks.h" #include "FunctionTimer.h" #include "Utils.h" #include <iostream> #include <exception> Loading
src/Benchmarks/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -11,10 +11,10 @@ add_subdirectory( Traversers ) set( headers Benchmarks.h Benchmarks.hpp FunctionTimer.h Logging.h CustomLogging.h JsonLogging.h Utils.h ) install( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Benchmarks )
src/Benchmarks/FunctionTimer.h→src/Benchmarks/Utils.h +150 −0 Original line number Diff line number Diff line /*************************************************************************** FunctionTimer.h - description Utils.h - description ------------------- begin : Dec 25, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. Loading @@ -14,12 +14,19 @@ #pragma once #include <tuple> #include <map> #include <TNL/Timer.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/SystemInfo.h> #include <TNL/Cuda/DeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -87,5 +94,57 @@ timeFunction( ComputeFunction compute, return std::make_tuple( loops, mean, stddev ); } inline std::map< std::string, std::string > getHardwareMetadata() { const int cpu_id = 0; const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); const std::string cacheInfo = std::to_string( cacheSizes.L1data ) + ", " + std::to_string( cacheSizes.L1instruction ) + ", " + std::to_string( cacheSizes.L2 ) + ", " + std::to_string( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Cuda::DeviceInfo::getActiveDevice(); const std::string deviceArch = std::to_string( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + std::to_string( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) ); #endif #ifdef HAVE_MPI int nproc = 1; // check if MPI was initialized (some benchmarks do not initialize MPI even when // they are built with HAVE_MPI and thus MPI::GetSize() cannot be used blindly) if( TNL::MPI::Initialized() ) nproc = TNL::MPI::GetSize(); #endif std::map< std::string, std::string > metadata { { "host name", SystemInfo::getHostname() }, { "architecture", SystemInfo::getArchitecture() }, { "system", SystemInfo::getSystemName() }, { "system release", SystemInfo::getSystemRelease() }, { "start time", SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", std::to_string( nproc ) }, #endif { "OpenMP enabled", std::to_string( Devices::Host::isOMPEnabled() ) }, { "OpenMP threads", std::to_string( Devices::Host::getMaxThreadsCount() ) }, { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", std::to_string( SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU threads per core", std::to_string( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) }, { "CPU max frequency (MHz)", std::to_string( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", std::to_string( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) }, { "GPU clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, { "GPU global memory (GB)", std::to_string( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, { "GPU memory clock rate (MHz)", std::to_string( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, { "GPU memory ECC enabled", std::to_string( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL