Newer
Older
/***************************************************************************
tnl-benchmark-traversers.h - description
-------------------
begin : Dec 17, 2018
copyright : (C) 2018 by oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Tomas Oberhuber
#pragma once
#include "../Benchmarks.h"
//#include "grid-traversing.h"
#include "GridTraversersBenchmark.h"
#include <TNL/Config/ConfigDescription.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/ParallelFor.h>
using namespace TNL;
using namespace TNL::Benchmarks;
using namespace TNL::Benchmarks::Traversers;
template< int Dimension,
typename Real = float,
typename Index = int >
bool runBenchmark( const Config::ParameterContainer& parameters,
Benchmark& benchmark,
Benchmark::MetadataMap& metadata )
{
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values
// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
const int minSize = parameters.getParameter< int >( "min-size" );
const int maxSize = parameters.getParameter< int >( "max-size" );
/****
* Full grid traversing
*/
benchmark.newBenchmark( String("Traversing without boundary conditions" + convertToString( Dimension ) + "D" ), metadata );
for( std::size_t size = minSize; size <= maxSize; size *= 2 )
{
GridTraversersBenchmark< Dimension, Devices::Host, Real, Index > hostTraverserBenchmark( size );
GridTraversersBenchmark< Dimension, Devices::Cuda, Real, Index > cudaTraverserBenchmark( size );
auto hostReset = [&]()
{
hostTraverserBenchmark.reset();
};
auto cudaReset = [&]()
{
cudaTraverserBenchmark.reset();
};
benchmark.setMetadataColumns(
Benchmark::MetadataColumns(
{ {"size", convertToString( size ) }, } ) );
/****
* Write one using C for
*/
auto hostWriteOneUsingPureC = [&] ()
{
hostTraverserBenchmark.writeOneUsingPureC();
};
auto cudaWriteOneUsingPureC = [&] ()
{
cudaTraverserBenchmark.writeOneUsingPureC();
};
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingPureC );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingPureC );
#endif
benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingPureC );
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingPureC );
/****
* Write one using parallel for
*/
auto hostWriteOneUsingParallelFor = [&] ()
hostTraverserBenchmark.writeOneUsingParallelFor();
auto cudaWriteOneUsingParallelFor = [&] ()
cudaTraverserBenchmark.writeOneUsingParallelFor();
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingParallelFor );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingParallelFor );
#endif
benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingParallelFor );
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingParallelFor );
/****
* Write one using traverser
*/
auto hostWriteOneUsingTraverser = [&] ()
{
hostTraverserBenchmark.writeOneUsingTraverser();
auto cudaWriteOneUsingTraverser = [&] ()
{
cudaTraverserBenchmark.writeOneUsingTraverser();
benchmark.setOperation( "traverser", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingTraverser );
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingTraverser );
benchmark.setOperation( "traverser RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingTraverser );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingTraverser );
#endif
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
}
/****
* Full grid traversing
*/
benchmark.newBenchmark( String("Traversing with boundary conditions" + convertToString( Dimension ) + "D" ), metadata );
for( std::size_t size = minSize; size <= maxSize; size *= 2 )
{
GridTraversersBenchmark< Dimension, Devices::Host, Real, Index > hostTraverserBenchmark( size );
GridTraversersBenchmark< Dimension, Devices::Cuda, Real, Index > cudaTraverserBenchmark( size );
auto hostReset = [&]()
{
hostTraverserBenchmark.reset();
};
auto cudaReset = [&]()
{
cudaTraverserBenchmark.reset();
};
benchmark.setMetadataColumns(
Benchmark::MetadataColumns(
{ {"size", convertToString( size ) }, } ) );
/****
* Write one using C for
*/
auto hostTraverseUsingPureC = [&] ()
{
hostTraverserBenchmark.traverseUsingPureC();
};
auto cudaTraverseUsingPureC = [&] ()
{
cudaTraverserBenchmark.traverseUsingPureC();
};
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingPureC );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingPureC );
#endif
benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingPureC );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingPureC );
#endif
/****
* Write one using parallel for
*/
auto hostTraverseUsingParallelFor = [&] ()
{
hostTraverserBenchmark.writeOneUsingParallelFor();
};
auto cudaTraverseUsingParallelFor = [&] ()
{
cudaTraverserBenchmark.writeOneUsingParallelFor();
};
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingParallelFor );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingParallelFor );
#endif
benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingParallelFor );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingParallelFor );
#endif
/****
* Write one using traverser
*/
auto hostTraverseUsingTraverser = [&] ()
{
hostTraverserBenchmark.writeOneUsingTraverser();
};
auto cudaTraverseUsingTraverser = [&] ()
{
cudaTraverserBenchmark.writeOneUsingTraverser();
};
benchmark.setOperation( "traverser", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingTraverser );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingTraverser );
#endif
benchmark.setOperation( "traverser RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingTraverser );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingTraverser );
#endif
void setupConfig( Config::ConfigDescription& config )
{
config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-traversers.log");
config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
config.addEntryEnum( "append" );
config.addEntryEnum( "overwrite" );
config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
config.addEntryEnum( "float" );
config.addEntryEnum( "double" );
config.addEntryEnum( "all" );
config.addEntry< int >( "dimension", "Set the problem dimension. 0 means all dimensions 1,2 and 3.", 0 );
config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark.", 10 );
config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark.", 1000 );
config.addEntry< int >( "size-step-factor", "Factor determining the size of arrays/vectors used in the benchmark. First size is min-size and each following size is stepFactor*previousSize, up to max-size.", 2 );
Benchmark::configSetup( config );
config.addDelimiter( "Device settings:" );
Devices::Host::configSetup( config );
Devices::Cuda::configSetup( config );
template< int Dimension >
bool setupBenchmark( const Config::ParameterContainer& parameters )
{
const String & logFileName = parameters.getParameter< String >( "log-file" );
const String & outputMode = parameters.getParameter< String >( "output-mode" );
const String & precision = parameters.getParameter< String >( "precision" );
const unsigned sizeStepFactor = parameters.getParameter< unsigned >( "size-step-factor" );
Benchmark benchmark; //( loops, verbose );
benchmark.setup( parameters );
Benchmark::MetadataMap metadata = getHardwareMetadata();
runBenchmark< Dimension >( parameters, benchmark, metadata );
auto mode = std::ios::out;
if( outputMode == "append" )
mode |= std::ios::app;
std::ofstream logFile( logFileName.getString(), mode );
if( ! benchmark.save( logFile ) )
{
std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl;
return false;
}
return true;
}
int main( int argc, char* argv[] )
{
Config::ConfigDescription config;
Config::ParameterContainer parameters;
setupConfig( config );
if( ! parseCommandLine( argc, argv, config, parameters ) ) {
config.printUsage( argv[ 0 ] );
return EXIT_FAILURE;
}
if( ! Devices::Host::setup( parameters ) ||
! Devices::Cuda::setup( parameters ) )
return EXIT_FAILURE;
const int dimension = parameters.getParameter< int >( "dimension" );
bool status( false );
if( ! dimension )
{
status = setupBenchmark< 1 >( parameters );
status |= setupBenchmark< 2 >( parameters );
status |= setupBenchmark< 3 >( parameters );
}
else
{
switch( dimension )
{
case 1:
status = setupBenchmark< 1 >( parameters );
status = setupBenchmark< 2 >( parameters );
status = setupBenchmark< 3 >( parameters );
break;
}
}
if( status == false )
return EXIT_FAILURE;
return EXIT_SUCCESS;
}