Loading src/Benchmarks/BLAS/tnl-benchmark-blas.h +2 −37 Original line number Diff line number Diff line Loading @@ -13,8 +13,7 @@ #pragma once #include <TNL/Devices/Host.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/Cuda.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> Loading @@ -26,9 +25,6 @@ using namespace TNL; using namespace TNL::Benchmarks; // TODO: should benchmarks check the result of the computation? template< typename Real > void runBlasBenchmarks( Benchmark & benchmark, Loading Loading @@ -146,38 +142,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); if( precision == "all" || precision == "float" ) runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow ); Loading src/Benchmarks/Benchmarks.h +49 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,11 @@ #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Communicators/MpiCommunicator.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -452,5 +457,49 @@ protected: Solvers::IterativeSolverMonitor< double, int > monitor; }; Benchmark::MetadataMap getHardwareMetadata() { const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +3 −41 Original line number Diff line number Diff line Loading @@ -16,12 +16,10 @@ #include <TNL/Debugging/FPE.h> #endif #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/NoDistrCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> Loading Loading @@ -340,43 +338,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", CommunicatorType::GetSize( CommunicatorType::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); // TODO: implement resolveMatrixType // return ! Matrices::resolveMatrixType< MainConfig, Loading Loading
src/Benchmarks/BLAS/tnl-benchmark-blas.h +2 −37 Original line number Diff line number Diff line Loading @@ -13,8 +13,7 @@ #pragma once #include <TNL/Devices/Host.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/Cuda.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> Loading @@ -26,9 +25,6 @@ using namespace TNL; using namespace TNL::Benchmarks; // TODO: should benchmarks check the result of the computation? template< typename Real > void runBlasBenchmarks( Benchmark & benchmark, Loading Loading @@ -146,38 +142,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); if( precision == "all" || precision == "float" ) runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow ); Loading
src/Benchmarks/Benchmarks.h +49 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,11 @@ #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Communicators/MpiCommunicator.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -452,5 +457,49 @@ protected: Solvers::IterativeSolverMonitor< double, int > monitor; }; Benchmark::MetadataMap getHardwareMetadata() { const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +3 −41 Original line number Diff line number Diff line Loading @@ -16,12 +16,10 @@ #include <TNL/Debugging/FPE.h> #endif #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/NoDistrCommunicator.h> #include <TNL/Communicators/ScopedInitializer.h> Loading Loading @@ -340,43 +338,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", CommunicatorType::GetSize( CommunicatorType::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); // TODO: implement resolveMatrixType // return ! Matrices::resolveMatrixType< MainConfig, Loading