Fixed BLAS benchmark (c4d018b8) · Commits · TNL / tnl-dev

src/Benchmarks/BLAS/array-operations.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@ benchmarkArrayOperations( Benchmark & benchmark,
		typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
		using namespace std;

		double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
		double datasetSize = (double) size * sizeof( Real ) / oneGB;

		HostArray hostArray, hostArray2;
		CudaArray deviceArray, deviceArray2;

+1 −1

Original line number	Diff line number	Diff line
		@@ -139,7 +139,7 @@ benchmarkSpMV( Benchmark & benchmark,

		const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
		setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
		const double datasetSize = ( double ) loops * elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
		const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;

		// reset function
		auto reset = [&]() {

+3 −3

Original line number	Diff line number	Diff line
		@@ -36,7 +36,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
		typedef Containers::Vector< Real, Devices::Cuda, Index > CudaVector;
		using namespace std;

		double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
		double datasetSize = (double) size * sizeof( Real ) / oneGB;

		HostVector hostVector, hostVector2;
		CudaVector deviceVector, deviceVector2;
		@@ -252,7 +252,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
		hostVector.computePrefixSum();
		timer.stop();
		timeHost = timer.getTime();
		bandwidth = 2 * datasetSize / loops / timer.getTime();
		bandwidth = 2 * datasetSize / timer.getTime();
		std::cout << " CPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;

		timer.reset();
		@@ -260,7 +260,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
		deviceVector.computePrefixSum();
		timer.stop();
		timeDevice = timer.getTime();
		bandwidth = 2 * datasetSize / loops / timer.getTime();
		bandwidth = 2 * datasetSize / timer.getTime();
		std::cout << " GPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
		std::cout << " CPU/GPU speedup: " << timeHost / timeDevice << std::endl;

+3 −1

Original line number	Diff line number	Diff line
		@@ -504,7 +504,9 @@ Benchmark::MetadataMap getHardwareMetadata()
		{ "system release", Devices::SystemInfo::getSystemRelease() },
		{ "start time", Devices::SystemInfo::getCurrentTime() },
		#ifdef HAVE_MPI
		{ "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) },
		{ "number of MPI processes", (Communicators::MpiCommunicator::IsInitialized())
		? Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup )
		: 1 },
		#endif
		{ "OpenMP enabled", Devices::Host::isOMPEnabled() },
		{ "OpenMP threads", Devices::Host::getMaxThreadsCount() },