Benchmarks: added benchmarks for array copy and compare using memcpy and memcmp (7a5840de) · Commits · TNL / tnl-dev

src/Benchmarks/BLAS/array-operations.h

+32 −0

Original line number	Diff line number	Diff line
		@@ -12,6 +12,8 @@

		#pragma once

		#include <cstring>

		#include "../Benchmarks.h"

		#include <TNL/Containers/Array.h>
		@@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark,
		reset12();


		if( std::is_fundamental< Real >::value ) {
		// std::memcmp
		auto compareHost = [&]() {
		if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 )
		resultHost = true;
		else
		resultHost = false;
		};
		benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize );
		benchmark.time< Devices::Host >( reset12, "CPU", compareHost );

		// std::memcpy and cudaMemcpy
		auto copyHost = [&]() {
		std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) );
		};
		benchmark.setOperation( "copy (memcpy)", 2 * datasetSize );
		benchmark.time< Devices::Host >( reset12, "CPU", copyHost );
		#ifdef HAVE_CUDA
		auto copyCuda = [&]() {
		cudaMemcpy( deviceArray.getData(),
		deviceArray2.getData(),
		deviceArray.getSize() * sizeof(Real),
		cudaMemcpyDeviceToDevice );
		TNL_CHECK_CUDA_DEVICE;
		};
		benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda );
		#endif
		}


		auto compareHost = [&]() {
		resultHost = (int) ( hostArray == hostArray2 );
		};

+1 −1

Original line number	Diff line number	Diff line
		@@ -182,7 +182,7 @@ main( int argc, char* argv[] )
		runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow );

		if( ! benchmark.save( logFile ) ) {
		std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
		std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl;
		return EXIT_FAILURE;
		}