diff --git a/src/TNL/Devices/CMakeLists.txt b/src/TNL/Devices/CMakeLists.txt index d771a6c5bad945c161bd3b7faa604e2e8b237774..b2beec09623a675a0fb9743e388dd8befb90b957 100644 --- a/src/TNL/Devices/CMakeLists.txt +++ b/src/TNL/Devices/CMakeLists.txt @@ -2,26 +2,11 @@ set (headers Cuda.h Cuda_impl.h CudaCallable.h CudaDeviceInfo.h + CudaDeviceInfo_impl.h Host.h MIC.h - SystemInfo.h ) + SystemInfo.h + SystemInfo_impl.h +) -SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Devices ) -set( common_SOURCES - ${CURRENT_DIR}/SystemInfo.cpp ) - -IF( BUILD_CUDA ) - set( tnl_devices_CUDA__SOURCES - ${common_SOURCES} - ${CURRENT_DIR}/CudaDeviceInfo.cu - PARENT_SCOPE ) -ENDIF() - -set( tnl_devices_SOURCES - ${common_SOURCES} - ${CURRENT_DIR}/CudaDeviceInfo.cpp - PARENT_SCOPE ) - -#SET( libtnlcoreincludedir ${TNL_INCLUDE_DIR}/core ) -#SET( libtnlcoreinclude_HEADERS ${headers} ) INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Devices ) diff --git a/src/TNL/Devices/CudaDeviceInfo.cpp b/src/TNL/Devices/CudaDeviceInfo.cpp deleted file mode 100644 index 85a6604d87bba8655aac46609d4ae0db8dc24934..0000000000000000000000000000000000000000 --- a/src/TNL/Devices/CudaDeviceInfo.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/*************************************************************************** - Devices::CudaDeviceInfo.cpp - description - ------------------- - begin : Jun 21, 2015 - copyright : (C) 2007 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef HAVE_CUDA - -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Logger.h> - -namespace TNL { -namespace Devices { - -int -CudaDeviceInfo:: -getNumberOfDevices() -{ - return -1; -} - -int -CudaDeviceInfo:: -getActiveDevice() -{ - return -1; -} - -String -CudaDeviceInfo:: -getDeviceName( int deviceNum ) -{ - return String( "" ); -} - -int -CudaDeviceInfo:: -getArchitectureMajor( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getArchitectureMinor( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getClockRate( int deviceNum ) -{ - return 0; -} - -size_t -CudaDeviceInfo:: -getGlobalMemory( int deviceNum ) -{ - return 0; -} - -size_t -CudaDeviceInfo:: -getFreeGlobalMemory() -{ - return 0; -} - -int -CudaDeviceInfo:: -getMemoryClockRate( int deviceNum ) -{ - return 0; -} - -bool -CudaDeviceInfo:: -getECCEnabled( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getCudaMultiprocessors( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getCudaCoresPerMultiprocessors( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getCudaCores( int deviceNum ) -{ - return 0; -} - -int -CudaDeviceInfo:: -getRegistersPerMultiprocessor( int deviceNum ) -{ - return 0; -} - -void -CudaDeviceInfo:: -writeDeviceInfo( Logger& logger ) -{ -} - -} // namespace Devices -} // namespace TNL - -#endif diff --git a/src/TNL/Devices/CudaDeviceInfo.cu b/src/TNL/Devices/CudaDeviceInfo.cu deleted file mode 100644 index 4e66c095d6579385f668de4cf93b255b3182c87f..0000000000000000000000000000000000000000 --- a/src/TNL/Devices/CudaDeviceInfo.cu +++ /dev/null @@ -1,218 +0,0 @@ -/*************************************************************************** - CudaDeviceInfo.cu - description - ------------------- - begin : Jun 21, 2015 - copyright : (C) 2007 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifdef HAVE_CUDA - -#include <unordered_map> - -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Logger.h> - -namespace TNL { -namespace Devices { - -int -CudaDeviceInfo:: -getNumberOfDevices() -{ - int devices; - cudaGetDeviceCount( &devices ); - return devices; -} - -int -CudaDeviceInfo:: -getActiveDevice() -{ - int device; - cudaGetDevice( &device ); - return device; -} - -String -CudaDeviceInfo:: -getDeviceName( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return String( properties.name ); -} - -int -CudaDeviceInfo:: -getArchitectureMajor( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.major; -} - -int -CudaDeviceInfo:: -getArchitectureMinor( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.minor; -} - -int -CudaDeviceInfo:: -getClockRate( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.clockRate; -} - -size_t -CudaDeviceInfo:: -getGlobalMemory( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.totalGlobalMem; -} - -size_t -CudaDeviceInfo:: -getFreeGlobalMemory() -{ - size_t free = 0; - size_t total = 0; - cudaMemGetInfo( &free, &total ); - return free; -} - -int -CudaDeviceInfo:: -getMemoryClockRate( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.memoryClockRate; -} - -bool -CudaDeviceInfo:: -getECCEnabled( int deviceNum ) -{ - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - return properties.ECCEnabled; -} - -int -CudaDeviceInfo:: -getCudaMultiprocessors( int deviceNum ) -{ - // results are cached because they are used for configuration of some kernels - static std::unordered_map< int, int > results; - if( results.count( deviceNum ) == 0 ) { - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - results.emplace( deviceNum, properties.multiProcessorCount ); - return properties.multiProcessorCount; - } - return results[ deviceNum ]; -} - -int -CudaDeviceInfo:: -getCudaCoresPerMultiprocessors( int deviceNum ) -{ - int major = CudaDeviceInfo::getArchitectureMajor( deviceNum ); - int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum ); - switch( major ) - { - case 1: // Tesla generation, G80, G8x, G9x classes - return 8; - case 2: // Fermi generation - switch( minor ) - { - case 0: // GF100 class - return 32; - case 1: // GF10x class - return 48; - } - case 3: // Kepler generation -- GK10x, GK11x classes - return 192; - case 5: // Maxwell generation -- GM10x, GM20x classes - return 128; - case 6: // Pascal generation - switch( minor ) - { - case 0: // GP100 class - return 64; - case 1: // GP10x classes - case 2: - return 128; - } - default: - return -1; - } -} - -int -CudaDeviceInfo:: -getCudaCores( int deviceNum ) -{ - return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) * - CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum ); -} - -int -CudaDeviceInfo:: -getRegistersPerMultiprocessor( int deviceNum ) -{ - // results are cached because they are used for configuration of some kernels - static std::unordered_map< int, int > results; - if( results.count( deviceNum ) == 0 ) { - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, deviceNum ); - results.emplace( deviceNum, properties.regsPerMultiprocessor ); - return properties.regsPerMultiprocessor; - } - return results[ deviceNum ]; -} - -void -CudaDeviceInfo:: -writeDeviceInfo( Logger& logger ) -{ - logger.writeParameter< String >( "CUDA GPU info", String("") ); - // TODO: Printing all devices does not make sense until TNL can actually - // use more than one device for computations. Printing only the active - // device for now... -// int devices = getNumberOfDevices(); -// writeParameter< int >( "Number of devices", devices, 1 ); -// for( int i = 0; i < devices; i++ ) -// { -// logger.writeParameter< int >( "Device no.", i, 1 ); - int i = getActiveDevice(); - logger.writeParameter< String >( "Name", getDeviceName( i ), 2 ); - String deviceArch = convertToString( getArchitectureMajor( i ) ) + "." + - convertToString( getArchitectureMinor( i ) ); - logger.writeParameter< String >( "Architecture", deviceArch, 2 ); - logger.writeParameter< int >( "CUDA cores", getCudaCores( i ), 2 ); - double clockRate = ( double ) getClockRate( i ) / 1.0e3; - logger.writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 ); - double globalMemory = ( double ) getGlobalMemory( i ) / 1.0e9; - logger.writeParameter< double >( "Global memory (in GB)", globalMemory, 2 ); - double memoryClockRate = ( double ) getMemoryClockRate( i ) / 1.0e3; - logger.writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 ); - logger.writeParameter< bool >( "ECC enabled", getECCEnabled( i ), 2 ); -// } -} - -} // namespace Devices -} // namespace TNL - -#endif diff --git a/src/TNL/Devices/CudaDeviceInfo.h b/src/TNL/Devices/CudaDeviceInfo.h index 0b02daee53071d5899f4de14041e40f2da96adaa..9eefe3bad8932670af271204e03f72b5eb501a95 100644 --- a/src/TNL/Devices/CudaDeviceInfo.h +++ b/src/TNL/Devices/CudaDeviceInfo.h @@ -15,9 +15,6 @@ #include <TNL/String.h> namespace TNL { - -class Logger; - namespace Devices { class CudaDeviceInfo @@ -51,9 +48,9 @@ class CudaDeviceInfo static int getCudaCores( int deviceNum ); static int getRegistersPerMultiprocessor( int deviceNum ); - - static void writeDeviceInfo( Logger& logger ); }; } // namespace Devices } // namespace TNL + +#include <TNL/Devices/CudaDeviceInfo_impl.h> diff --git a/src/TNL/Devices/CudaDeviceInfo_impl.h b/src/TNL/Devices/CudaDeviceInfo_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..f29ecd8c91493edb538f11a8ced6c9ee6503983a --- /dev/null +++ b/src/TNL/Devices/CudaDeviceInfo_impl.h @@ -0,0 +1,243 @@ +/*************************************************************************** + CudaDeviceInfo_impl.h - description + ------------------- + begin : Jun 21, 2015 + copyright : (C) 2007 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <unordered_map> + +#include <TNL/Devices/CudaDeviceInfo.h> +#include <TNL/Exceptions/CudaSupportMissing.h> + +namespace TNL { +namespace Devices { + +inline int +CudaDeviceInfo:: +getNumberOfDevices() +{ +#ifdef HAVE_CUDA + int devices; + cudaGetDeviceCount( &devices ); + return devices; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getActiveDevice() +{ +#ifdef HAVE_CUDA + int device; + cudaGetDevice( &device ); + return device; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline String +CudaDeviceInfo:: +getDeviceName( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return String( properties.name ); +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getArchitectureMajor( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.major; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getArchitectureMinor( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.minor; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getClockRate( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.clockRate; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline size_t +CudaDeviceInfo:: +getGlobalMemory( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.totalGlobalMem; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline size_t +CudaDeviceInfo:: +getFreeGlobalMemory() +{ +#ifdef HAVE_CUDA + size_t free = 0; + size_t total = 0; + cudaMemGetInfo( &free, &total ); + return free; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getMemoryClockRate( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.memoryClockRate; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline bool +CudaDeviceInfo:: +getECCEnabled( int deviceNum ) +{ +#ifdef HAVE_CUDA + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + return properties.ECCEnabled; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getCudaMultiprocessors( int deviceNum ) +{ +#ifdef HAVE_CUDA + // results are cached because they are used for configuration of some kernels + static std::unordered_map< int, int > results; + if( results.count( deviceNum ) == 0 ) { + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + results.emplace( deviceNum, properties.multiProcessorCount ); + return properties.multiProcessorCount; + } + return results[ deviceNum ]; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getCudaCoresPerMultiprocessors( int deviceNum ) +{ +#ifdef HAVE_CUDA + int major = CudaDeviceInfo::getArchitectureMajor( deviceNum ); + int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum ); + switch( major ) + { + case 1: // Tesla generation, G80, G8x, G9x classes + return 8; + case 2: // Fermi generation + switch( minor ) + { + case 0: // GF100 class + return 32; + case 1: // GF10x class + return 48; + } + case 3: // Kepler generation -- GK10x, GK11x classes + return 192; + case 5: // Maxwell generation -- GM10x, GM20x classes + return 128; + case 6: // Pascal generation + switch( minor ) + { + case 0: // GP100 class + return 64; + case 1: // GP10x classes + case 2: + return 128; + } + default: + return -1; + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getCudaCores( int deviceNum ) +{ +#ifdef HAVE_CUDA + return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) * + CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum ); +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +inline int +CudaDeviceInfo:: +getRegistersPerMultiprocessor( int deviceNum ) +{ +#ifdef HAVE_CUDA + // results are cached because they are used for configuration of some kernels + static std::unordered_map< int, int > results; + if( results.count( deviceNum ) == 0 ) { + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, deviceNum ); + results.emplace( deviceNum, properties.regsPerMultiprocessor ); + return properties.regsPerMultiprocessor; + } + return results[ deviceNum ]; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +} // namespace Devices +} // namespace TNL diff --git a/src/TNL/Devices/SystemInfo.h b/src/TNL/Devices/SystemInfo.h index bafcdd428264f9586f72802916bafbe26ce1eda9..f62321d6f819303ec4c12d174a71305f26792ac1 100644 --- a/src/TNL/Devices/SystemInfo.h +++ b/src/TNL/Devices/SystemInfo.h @@ -15,9 +15,6 @@ #include <TNL/String.h> namespace TNL { - -class Logger; - namespace Devices { struct CacheSizes { @@ -45,15 +42,16 @@ public: static CacheSizes getCPUCacheSizes( int cpu_id ); static size_t getFreeMemory(); - static void writeDeviceInfo( Logger& logger ); - protected: - static int numberOfProcessors; - static String CPUModelName; - static int CPUThreads; - static int CPUCores; + struct CPUInfo + { + int numberOfProcessors = 0; + String CPUModelName; + int CPUThreads = 0; + int CPUCores = 0; + }; - static void parseCPUInfo(); + static CPUInfo parseCPUInfo(); template< typename ResultType > static ResultType @@ -72,3 +70,5 @@ protected: } // namespace Devices } // namespace TNL + +#include <TNL/Devices/SystemInfo_impl.h> diff --git a/src/TNL/Devices/SystemInfo.cpp b/src/TNL/Devices/SystemInfo_impl.h similarity index 58% rename from src/TNL/Devices/SystemInfo.cpp rename to src/TNL/Devices/SystemInfo_impl.h index 9ad5e19b9f42ffb99a1c9a89d825383f96b70c94..008ec644c954d3b045302dc7f8a7529fdbcb1131 100644 --- a/src/TNL/Devices/SystemInfo.cpp +++ b/src/TNL/Devices/SystemInfo_impl.h @@ -1,5 +1,5 @@ /*************************************************************************** - SystemInfo.cpp - description + SystemInfo_impl.h - description ------------------- begin : Jul 8, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. @@ -8,6 +8,8 @@ /* See Copyright Notice in tnl/Copyright */ +#pragma once + #include <set> #include <iomanip> #include <cstring> @@ -17,17 +19,11 @@ #include <sys/stat.h> #include <TNL/Devices/SystemInfo.h> -#include <TNL/Logger.h> namespace TNL { namespace Devices { -int SystemInfo::numberOfProcessors( 0 ); -String SystemInfo::CPUModelName( "" ); -int SystemInfo::CPUThreads( 0 ); -int SystemInfo::CPUCores( 0 ); - -String +inline String SystemInfo::getHostname( void ) { char host_name[ 256 ]; @@ -35,7 +31,7 @@ SystemInfo::getHostname( void ) return String( host_name ); } -String +inline String SystemInfo::getArchitecture( void ) { utsname uts; @@ -43,7 +39,7 @@ SystemInfo::getArchitecture( void ) return String( uts.machine ); } -String +inline String SystemInfo::getSystemName( void ) { utsname uts; @@ -51,7 +47,7 @@ SystemInfo::getSystemName( void ) return String( uts.sysname ); } -String +inline String SystemInfo::getSystemRelease( void ) { utsname uts; @@ -59,7 +55,7 @@ SystemInfo::getSystemRelease( void ) return String( uts.release ); } -String +inline String SystemInfo::getCurrentTime( const char* format ) { const std::time_t time_since_epoch = std::time( nullptr ); @@ -70,46 +66,58 @@ SystemInfo::getCurrentTime( const char* format ) } -int +inline int SystemInfo::getNumberOfProcessors( void ) { - if( numberOfProcessors == 0 ) - parseCPUInfo(); + static int numberOfProcessors = 0; + if( numberOfProcessors == 0 ) { + CPUInfo info = parseCPUInfo(); + numberOfProcessors = info.numberOfProcessors; + } return numberOfProcessors; } -String +inline String SystemInfo::getOnlineCPUs( void ) { std::string online = readFile< std::string >( "/sys/devices/system/cpu/online" ); return String( online.c_str() ); } -int +inline int SystemInfo::getNumberOfCores( int cpu_id ) { - if( CPUCores == 0 ) - parseCPUInfo(); + static int CPUCores = 0; + if( CPUCores == 0 ) { + CPUInfo info = parseCPUInfo(); + CPUCores = info.CPUCores; + } return CPUCores; } -int +inline int SystemInfo::getNumberOfThreads( int cpu_id ) { - if( CPUThreads == 0 ) - parseCPUInfo(); + static int CPUThreads = 0; + if( CPUThreads == 0 ) { + CPUInfo info = parseCPUInfo(); + CPUThreads = info.CPUThreads; + } return CPUThreads; } -String +inline String SystemInfo::getCPUModelName( int cpu_id ) { - if( CPUModelName == "" ) - parseCPUInfo(); + static String CPUModelName; + if( CPUModelName == "" ) { + CPUInfo info = parseCPUInfo(); + CPUModelName = info.CPUModelName; + } return CPUModelName; } -int +inline int SystemInfo::getCPUMaxFrequency( int cpu_id ) { String fileName( "/sys/devices/system/cpu/cpu" ); @@ -117,7 +125,7 @@ SystemInfo::getCPUMaxFrequency( int cpu_id ) return readFile< int >( fileName ); } -CacheSizes +inline CacheSizes SystemInfo::getCPUCacheSizes( int cpu_id ) { String directory( "/sys/devices/system/cpu/cpu" ); @@ -148,59 +156,23 @@ SystemInfo::getCPUCacheSizes( int cpu_id ) return sizes; } -void -SystemInfo:: -writeDeviceInfo( Logger& logger ) +inline size_t +SystemInfo::getFreeMemory() { -// compiler detection macros: -// http://nadeausoftware.com/articles/2012/10/c_c_tip_how_detect_compiler_name_and_version_using_compiler_predefined_macros -// https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compilation-phases -#if defined(__NVCC__) - #define TNL_STRINGIFY(x) #x - const char* compiler_name = "Nvidia NVCC (" TNL_STRINGIFY(__CUDACC_VER_MAJOR__) "." TNL_STRINGIFY(__CUDACC_VER_MINOR__) "." TNL_STRINGIFY(__CUDACC_VER_BUILD__) ")"; - #undef TNL_STRINGIFY -#elif defined(__clang__) - const char* compiler_name = "Clang/LLVM (" __VERSION__ ")"; -#elif defined(__ICC) || defined(__INTEL_COMPILER) - const char* compiler_name = "Intel ICPC (" __VERSION__ ")"; -#elif defined(__GNUC__) || defined(__GNUG__) - const char* compiler_name = "GNU G++ (" __VERSION__ ")"; -#else - const char* compiler_name = "(unknown)"; -#endif - - logger.writeParameter< String >( "Host name:", getHostname() ); - logger.writeParameter< String >( "System:", getSystemName() ); - logger.writeParameter< String >( "Release:", getSystemRelease() ); - logger.writeParameter< String >( "Architecture:", getArchitecture() ); - logger.writeParameter< String >( "TNL compiler:", compiler_name ); - // FIXME: generalize for multi-socket systems, here we consider only the first found CPU - const int cpu_id = 0; - const int threads = getNumberOfThreads( cpu_id ); - const int cores = getNumberOfCores( cpu_id ); - int threadsPerCore = 0; - if( cores > 0 ) - threadsPerCore = threads / cores; - logger.writeParameter< String >( "CPU info", String("") ); - logger.writeParameter< String >( "Model name:", getCPUModelName( cpu_id ), 1 ); - logger.writeParameter< int >( "Cores:", cores, 1 ); - logger.writeParameter< int >( "Threads per core:", threadsPerCore, 1 ); - logger.writeParameter< float >( "Max clock rate (in MHz):", getCPUMaxFrequency( cpu_id ) / 1000, 1 ); - CacheSizes cacheSizes = getCPUCacheSizes( cpu_id ); - String cacheInfo = convertToString( cacheSizes.L1data ) + ", " - + convertToString( cacheSizes.L1instruction ) + ", " - + convertToString( cacheSizes.L2 ) + ", " - + convertToString( cacheSizes.L3 ); - logger.writeParameter< String >( "Cache (L1d, L1i, L2, L3):", cacheInfo, 1 ); + long pages = sysconf(_SC_PHYS_PAGES); + long page_size = sysconf(_SC_PAGE_SIZE); + return pages * page_size; } -void + +inline SystemInfo::CPUInfo SystemInfo::parseCPUInfo( void ) { + CPUInfo info; std::ifstream file( "/proc/cpuinfo" ); if( ! file ) { std::cerr << "Unable to read information from /proc/cpuinfo." << std::endl; - return; + return info; } char line[ 1024 ]; @@ -221,32 +193,26 @@ SystemInfo::parseCPUInfo( void ) { i = strlen( "model name" ); while( line[ i ] != ':' && line[ i ] ) i ++; - CPUModelName.setString( &line[ i + 1 ] ); + info.CPUModelName.setString( &line[ i + 1 ] ); continue; } if( strncmp( line, "cpu cores", strlen( "cpu cores" ) ) == 0 ) { i = strlen( "cpu MHz" ); while( line[ i ] != ':' && line[ i ] ) i ++; - CPUCores = atoi( &line[ i + 1 ] ); + info.CPUCores = atoi( &line[ i + 1 ] ); continue; } if( strncmp( line, "siblings", strlen( "siblings" ) ) == 0 ) { i = strlen( "siblings" ); while( line[ i ] != ':' && line[ i ] ) i ++; - CPUThreads = atoi( &line[ i + 1 ] ); + info.CPUThreads = atoi( &line[ i + 1 ] ); } } - numberOfProcessors = processors.size(); -} - + info.numberOfProcessors = processors.size(); -size_t SystemInfo::getFreeMemory() -{ - long pages = sysconf(_SC_PHYS_PAGES); - long page_size = sysconf(_SC_PAGE_SIZE); - return pages * page_size; + return info; } } // namespace Devices diff --git a/src/TNL/Logger_impl.h b/src/TNL/Logger_impl.h index 4a42e55dce8bd2ca3379bd4af455ac3a465b3e77..0e1dd8dc62434faf07b64a95f1f896ed9b8af940 100644 --- a/src/TNL/Logger_impl.h +++ b/src/TNL/Logger_impl.h @@ -44,9 +44,72 @@ Logger::writeSeparator() inline bool Logger::writeSystemInformation( const Config::ParameterContainer& parameters ) { - Devices::SystemInfo::writeDeviceInfo( *this ); - if( parameters.getParameter< String >( "device" ) == "cuda" ) - Devices::CudaDeviceInfo::writeDeviceInfo( *this ); +// compiler detection macros: +// http://nadeausoftware.com/articles/2012/10/c_c_tip_how_detect_compiler_name_and_version_using_compiler_predefined_macros +// https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compilation-phases +#if defined(__NVCC__) + #define TNL_STRINGIFY(x) #x + const char* compiler_name = "Nvidia NVCC (" TNL_STRINGIFY(__CUDACC_VER_MAJOR__) "." TNL_STRINGIFY(__CUDACC_VER_MINOR__) "." TNL_STRINGIFY(__CUDACC_VER_BUILD__) ")"; + #undef TNL_STRINGIFY +#elif defined(__clang__) + const char* compiler_name = "Clang/LLVM (" __VERSION__ ")"; +#elif defined(__ICC) || defined(__INTEL_COMPILER) + const char* compiler_name = "Intel ICPC (" __VERSION__ ")"; +#elif defined(__GNUC__) || defined(__GNUG__) + const char* compiler_name = "GNU G++ (" __VERSION__ ")"; +#else + const char* compiler_name = "(unknown)"; +#endif + + writeParameter< String >( "Host name:", Devices::SystemInfo::getHostname() ); + writeParameter< String >( "System:", Devices::SystemInfo::getSystemName() ); + writeParameter< String >( "Release:", Devices::SystemInfo::getSystemRelease() ); + writeParameter< String >( "Architecture:", Devices::SystemInfo::getArchitecture() ); + writeParameter< String >( "TNL compiler:", compiler_name ); + // FIXME: generalize for multi-socket systems, here we consider only the first found CPU + const int cpu_id = 0; + const int threads = Devices::SystemInfo::getNumberOfThreads( cpu_id ); + const int cores = Devices::SystemInfo::getNumberOfCores( cpu_id ); + int threadsPerCore = 0; + if( cores > 0 ) + threadsPerCore = threads / cores; + writeParameter< String >( "CPU info", "" ); + writeParameter< String >( "Model name:", Devices::SystemInfo::getCPUModelName( cpu_id ), 1 ); + writeParameter< int >( "Cores:", cores, 1 ); + writeParameter< int >( "Threads per core:", threadsPerCore, 1 ); + writeParameter< double >( "Max clock rate (in MHz):", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 ); + const Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); + const String cacheInfo = convertToString( cacheSizes.L1data ) + ", " + + convertToString( cacheSizes.L1instruction ) + ", " + + convertToString( cacheSizes.L2 ) + ", " + + convertToString( cacheSizes.L3 ); + writeParameter< String >( "Cache (L1d, L1i, L2, L3):", cacheInfo, 1 ); + + if( parameters.getParameter< String >( "device" ) == "cuda" ) { + writeParameter< String >( "CUDA GPU info", "" ); + // TODO: Printing all devices does not make sense until TNL can actually + // use more than one device for computations. Printing only the active + // device for now... + // int devices = getNumberOfDevices(); + // writeParameter< int >( "Number of devices", devices, 1 ); + // for( int i = 0; i < devices; i++ ) + // { + // logger.writeParameter< int >( "Device no.", i, 1 ); + const int i = Devices::CudaDeviceInfo::getActiveDevice(); + writeParameter< String >( "Name", Devices::CudaDeviceInfo::getDeviceName( i ), 2 ); + const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( i ) ) + "." + + convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( i ) ); + writeParameter< String >( "Architecture", deviceArch, 2 ); + writeParameter< int >( "CUDA cores", Devices::CudaDeviceInfo::getCudaCores( i ), 2 ); + const double clockRate = ( double ) Devices::CudaDeviceInfo::getClockRate( i ) / 1.0e3; + writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 ); + const double globalMemory = ( double ) Devices::CudaDeviceInfo::getGlobalMemory( i ) / 1.0e9; + writeParameter< double >( "Global memory (in GB)", globalMemory, 2 ); + const double memoryClockRate = ( double ) Devices::CudaDeviceInfo::getMemoryClockRate( i ) / 1.0e3; + writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 ); + writeParameter< bool >( "ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( i ), 2 ); + // } + } return true; }