Refactoring benchmarks - common getHardwareMetadata function (51d16ef0) · Commits · TNL / tnl-dev

src/Benchmarks/BLAS/tnl-benchmark-blas.h

+2 −37

Original line number	Diff line number	Diff line
		@@ -13,8 +13,7 @@
		#pragma once

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Devices/SystemInfo.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Config/ConfigDescription.h>
		#include <TNL/Config/ParameterContainer.h>

		@@ -26,9 +25,6 @@ using namespace TNL;
		using namespace TNL::Benchmarks;


		// TODO: should benchmarks check the result of the computation?


		template< typename Real >
		void
		runBlasBenchmarks( Benchmark & benchmark,
		@@ -146,38 +142,7 @@ main( int argc, char* argv[] )
		Benchmark benchmark( loops, verbose );

		// prepare global metadata
		const int cpu_id = 0;
		Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
		String cacheInfo = String( cacheSizes.L1data ) + ", "
		+ String( cacheSizes.L1instruction ) + ", "
		+ String( cacheSizes.L2 ) + ", "
		+ String( cacheSizes.L3 );
		#ifdef HAVE_CUDA
		const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
		const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
		String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
		#endif
		Benchmark::MetadataMap metadata {
		{ "host name", Devices::SystemInfo::getHostname() },
		{ "architecture", Devices::SystemInfo::getArchitecture() },
		{ "system", Devices::SystemInfo::getSystemName() },
		{ "system release", Devices::SystemInfo::getSystemRelease() },
		{ "start time", Devices::SystemInfo::getCurrentTime() },
		{ "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
		{ "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 },
		{ "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
		#ifdef HAVE_CUDA
		{ "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
		{ "GPU architecture", deviceArch },
		{ "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) },
		{ "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 },
		{ "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 },
		{ "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 },
		{ "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) },
		#endif
		};
		Benchmark::MetadataMap metadata = getHardwareMetadata();

		if( precision == "all" \|\| precision == "float" )
		runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow );

src/Benchmarks/Benchmarks.h

+49 −0

Original line number	Diff line number	Diff line
		@@ -21,6 +21,11 @@
		#include <TNL/String.h>
		#include <TNL/Solvers/IterativeSolverMonitor.h>

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/SystemInfo.h>
		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Communicators/MpiCommunicator.h>

		namespace TNL {
		namespace Benchmarks {

		@@ -452,5 +457,49 @@ protected:
		Solvers::IterativeSolverMonitor< double, int > monitor;
		};


		Benchmark::MetadataMap getHardwareMetadata()
		{
		const int cpu_id = 0;
		Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
		String cacheInfo = String( cacheSizes.L1data ) + ", "
		+ String( cacheSizes.L1instruction ) + ", "
		+ String( cacheSizes.L2 ) + ", "
		+ String( cacheSizes.L3 );
		#ifdef HAVE_CUDA
		const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
		const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
		String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
		#endif
		Benchmark::MetadataMap metadata {
		{ "host name", Devices::SystemInfo::getHostname() },
		{ "architecture", Devices::SystemInfo::getArchitecture() },
		{ "system", Devices::SystemInfo::getSystemName() },
		{ "system release", Devices::SystemInfo::getSystemRelease() },
		{ "start time", Devices::SystemInfo::getCurrentTime() },
		#ifdef HAVE_MPI
		{ "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) },
		#endif
		{ "OpenMP enabled", Devices::Host::isOMPEnabled() },
		{ "OpenMP threads", Devices::Host::getMaxThreadsCount() },
		{ "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
		{ "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 },
		{ "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
		#ifdef HAVE_CUDA
		{ "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
		{ "GPU architecture", deviceArch },
		{ "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) },
		{ "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 },
		{ "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 },
		{ "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 },
		{ "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) },
		#endif
		};

		return metadata;
		}

		} // namespace Benchmarks
		} // namespace TNL

src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h

+3 −41

Original line number	Diff line number	Diff line
		@@ -16,12 +16,10 @@
		#include <TNL/Debugging/FPE.h>
		#endif

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Devices/SystemInfo.h>
		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Config/ConfigDescription.h>
		#include <TNL/Config/ParameterContainer.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Communicators/MpiCommunicator.h>
		#include <TNL/Communicators/NoDistrCommunicator.h>
		#include <TNL/Communicators/ScopedInitializer.h>
		@@ -340,43 +338,7 @@ main( int argc, char* argv[] )
		Benchmark benchmark( loops, verbose );

		// prepare global metadata
		const int cpu_id = 0;
		Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id );
		String cacheInfo = String( cacheSizes.L1data ) + ", "
		+ String( cacheSizes.L1instruction ) + ", "
		+ String( cacheSizes.L2 ) + ", "
		+ String( cacheSizes.L3 );
		#ifdef HAVE_CUDA
		const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice();
		const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." +
		String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) );
		#endif
		Benchmark::MetadataMap metadata {
		{ "host name", Devices::SystemInfo::getHostname() },
		{ "architecture", Devices::SystemInfo::getArchitecture() },
		{ "system", Devices::SystemInfo::getSystemName() },
		{ "system release", Devices::SystemInfo::getSystemRelease() },
		{ "start time", Devices::SystemInfo::getCurrentTime() },
		#ifdef HAVE_MPI
		{ "number of MPI processes", CommunicatorType::GetSize( CommunicatorType::AllGroup ) },
		#endif
		{ "OpenMP enabled", Devices::Host::isOMPEnabled() },
		{ "OpenMP threads", Devices::Host::getMaxThreadsCount() },
		{ "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) },
		{ "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) },
		{ "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 },
		{ "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo },
		#ifdef HAVE_CUDA
		{ "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) },
		{ "GPU architecture", deviceArch },
		{ "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) },
		{ "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 },
		{ "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 },
		{ "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 },
		{ "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) },
		#endif
		};
		Benchmark::MetadataMap metadata = getHardwareMetadata();

		// TODO: implement resolveMatrixType
		// return ! Matrices::resolveMatrixType< MainConfig,