Fixed grid traversers benchmark. (7dce286c) · Commits · TNL / tnl-dev

src/Benchmarks/Traversers/WriteOne.h

deleted100644 → 0

+0 −109

Original line number	Diff line number	Diff line
		/***************************************************************************
		WriteOne.h - description
		-------------------
		begin : Dec 19, 2018
		copyright : (C) 2018 by oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		// Implemented by: Tomas Oberhuber

		#pragma once

		#include <TNL/ParallelFor.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Containers/Vector.h>

		namespace TNL {
		namespace Benchmarks {


		template< int Dimenions,
		typename Device,
		typename Real,
		typename Index >
		class WriteOne{};

		template< typename Device,
		typename Real,
		typename Index >
		class WriteOne< 1, Device, Real, Index >
		{
		public:

		using Vector = Containers::Vector< Real, Device, Index >;

		static void run( std::size_t size )
		{
		Vector v( size );
		auto writeOne = [] __cuda_callable__ ( Index i, Real* data )
		{
		data[ i ] = 1.0;
		};

		ParallelFor< Device >::exec( ( std::size_t ) 0, size, writeOne, v.getData() );
		}
		};


		template< typename Device,
		typename Real,
		typename Index >
		class WriteOne< 2, Device, Real, Index >
		{
		public:

		using Vector = Containers::Vector< Real, Device, Index >;

		static void run( std::size_t size )
		{
		Vector v( size * size );
		auto writeOne = [=] __cuda_callable__ ( Index i, Index j, Real* data )
		{
		data[ i * size + j ] = 1.0;
		};

		ParallelFor2D< Device >::exec( ( std::size_t ) 0,
		( std::size_t ) 0,
		size,
		size,
		writeOne, v.getData() );
		}
		};

		template< typename Device,
		typename Real,
		typename Index >
		class WriteOne< 3, Device, Real, Index >
		{
		public:

		using Vector = Containers::Vector< Real, Device, Index >;

		static void run( std::size_t size )
		{
		Vector v( size * size * size );
		auto writeOne = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data )
		{
		data[ ( i * size + j ) * size + k ] = 1.0;
		};

		ParallelFor3D< Device >::exec( ( std::size_t ) 0,
		( std::size_t ) 0,
		( std::size_t ) 0,
		size,
		size,
		size,
		writeOne, v.getData() );
		}
		};


		} // namespace Benchmarks
		} // namespace TNL

src/Benchmarks/Traversers/grid-traversing.h

+1 −19

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@
		#pragma once

		#include "../Benchmarks.h"
		#include "WriteOne.h"


		#include <TNL/Containers/Vector.h>

		@@ -29,24 +29,6 @@ class benchmarkTraversingFullGrid

		static void run ( Benchmark& benchmark, std::size_t size )
		{
		auto reset = [&]()
		{};

		auto testHost = [&] ()
		{
		WriteOne< Dimension, Devices::Host, Real, Index >::run( size );
		};

		auto testCuda = [&] ()
		{
		WriteOne< Dimension, Devices::Cuda, Real, Index >::run( size );
		};

		benchmark.setOperation( "writeOne", size * sizeof( Real ) );
		benchmark.time( reset, "CPU", testHost );
		#ifdef HAVE_CUDA
		benchmark.time( reset, "GPU", testCuda );
		#endif

		}
		};

src/Benchmarks/Traversers/tnl-benchmark-traversers.h

+56 −30

Original line number	Diff line number	Diff line
		@@ -13,7 +13,8 @@
		#pragma once

		#include "../Benchmarks.h"
		#include "grid-traversing.h"
		//#include "grid-traversing.h"
		#include "GridTraversersBenchmark.h"

		#include <TNL/Config/ConfigDescription.h>
		#include <TNL/Devices/Host.h>
		@@ -23,29 +24,10 @@
		using namespace TNL;
		using namespace TNL::Benchmarks;

		void setupConfig( Config::ConfigDescription& config )
		{
		config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
		config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
		config.addEntryEnum( "append" );
		config.addEntryEnum( "overwrite" );
		config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
		config.addEntryEnum( "float" );
		config.addEntryEnum( "double" );
		config.addEntryEnum( "all" );
		config.addEntry< int >( "dimension", "Set the problem dimension. 0 means all dimensions 1,2 and 3.", 0 );
		config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 10 );
		config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 1000 );
		config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
		config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 );
		config.addEntry< int >( "verbose", "Verbose mode.", 1 );

		config.addDelimiter( "Device settings:" );
		Devices::Host::configSetup( config );
		Devices::Cuda::configSetup( config );
		}

		template< int Dimension >
		template< int Dimension,
		typename Real = float,
		typename Index = int >
		bool runBenchmark( const Config::ParameterContainer& parameters,
		Benchmark& benchmark,
		Benchmark::MetadataMap& metadata )
		@@ -62,14 +44,59 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
		benchmark.newBenchmark( String("Full grid traversing " + convertToString( Dimension ) + "D" ), metadata );
		for( std::size_t size = minSize; size <= maxSize; size *= 2 )
		{
		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{"size", convertToString( size ) },
		} ));
		benchmarkTraversingFullGrid< Dimension >::run( benchmark, size );

		GridTraversersBenchmark< Dimension, Devices::Host, Real, Index > hostTraverserBenchmark( size );
		GridTraversersBenchmark< Dimension, Devices::Cuda, Real, Index > cudaTraverserBenchmark( size );

		auto reset = [&]() {};

		benchmark.setMetadataColumns(
		Benchmark::MetadataColumns(
		{ {"size", convertToString( size ) }, } ) );

		auto hostWriteOne = [&] ()
		{
		hostTraverserBenchmark.writeOne();
		};

		auto cudaWriteOne = [&] ()
		{
		cudaTraverserBenchmark.writeOne();
		};

		benchmark.setOperation( "writeOne", size * sizeof( Real ) );
		benchmark.time( reset, "CPU", hostWriteOne );
		#ifdef HAVE_CUDA
		benchmark.time( reset, "GPU", cudaWriteOne );
		#endif

		}
		return true;
		}

		void setupConfig( Config::ConfigDescription& config )
		{
		config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
		config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
		config.addEntryEnum( "append" );
		config.addEntryEnum( "overwrite" );
		config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
		config.addEntryEnum( "float" );
		config.addEntryEnum( "double" );
		config.addEntryEnum( "all" );
		config.addEntry< int >( "dimension", "Set the problem dimension. 0 means all dimensions 1,2 and 3.", 0 );
		config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 10 );
		config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 1000 );
		config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
		config.addEntry< bool >( "verbose", "Verbose mode.", true );

		Benchmark::configSetup( config );

		config.addDelimiter( "Device settings:" );
		Devices::Host::configSetup( config );
		Devices::Cuda::configSetup( config );
		}

		template< int Dimension >
		bool setupBenchmark( const Config::ParameterContainer& parameters )
		{
		@@ -77,10 +104,9 @@ bool setupBenchmark( const Config::ParameterContainer& parameters )
		const String & outputMode = parameters.getParameter< String >( "output-mode" );
		const String & precision = parameters.getParameter< String >( "precision" );
		const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
		const unsigned loops = parameters.getParameter< unsigned >( "loops" );
		const unsigned verbose = parameters.getParameter< unsigned >( "verbose" );

		Benchmark benchmark( loops, verbose );

		Benchmark benchmark; //( loops, verbose );
		Benchmark::MetadataMap metadata = getHardwareMetadata();
		runBenchmark< Dimension >( parameters, benchmark, metadata );

src/Benchmarks/scripts/CMakeLists.txt

+6 −9

Original line number	Diff line number	Diff line
		INSTALL( FILES matrix-market
		florida-matrix-market
		get-matrices
		convert-matrices
		draw-matrices
		INSTALL( FILES tnl-run-heat-equation-benchmark
		run-tnl-benchmark-spmv
		run-tnl-benchmark-traversers
		run-matrix-solvers-benchmark
		run-tnl-benchmark-spmv
		run-tnl-benchmark-linear-solvers
		tnl-run-heat-equation-benchmark
		cuda-profiler.conf
		process-cuda-profile.pl

		DESTINATION ${TNL_TARGET_DATA_DIRECTORY}/benchmark-scripts )

		INSTALL( FILES tnl-run-spmv-benchmark
		INSTALL( FILES run-tnl-benchmark-spmv
		run-tnl-benchmark-traversers
		DESTINATION bin
		PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )