Skip to content
Snippets Groups Projects
Commit ab6016d1 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Traversers benchmark tests can be controled from the command line.

parent 57f3b355
No related branches found
No related tags found
1 merge request!20Traversers optimizations
...@@ -33,6 +33,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters, ...@@ -33,6 +33,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
Benchmark& benchmark, Benchmark& benchmark,
Benchmark::MetadataMap& metadata ) Benchmark::MetadataMap& metadata )
{ {
const String tests = parameters.getParameter< String >( "tests" );
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(), // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible // which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values // to pass 64-bit integer values
...@@ -72,22 +73,28 @@ bool runBenchmark( const Config::ParameterContainer& parameters, ...@@ -72,22 +73,28 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
hostTraverserBenchmark.writeOneUsingPureC(); hostTraverserBenchmark.writeOneUsingPureC();
}; };
#ifdef HAVE_CUDA
auto cudaWriteOneUsingPureC = [&] () auto cudaWriteOneUsingPureC = [&] ()
{ {
cudaTraverserBenchmark.writeOneUsingPureC(); cudaTraverserBenchmark.writeOneUsingPureC();
}; };
#endif
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); if( tests == "all" || tests == "no-bc-pure-c")
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingPureC ); {
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingPureC );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingPureC ); if( withCuda )
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingPureC );
#endif #endif
benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingPureC );
benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingPureC );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingPureC ); if( withCuda )
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingPureC );
#endif #endif
}
/**** /****
* Write one using parallel for * Write one using parallel for
...@@ -97,22 +104,29 @@ bool runBenchmark( const Config::ParameterContainer& parameters, ...@@ -97,22 +104,29 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
hostTraverserBenchmark.writeOneUsingParallelFor(); hostTraverserBenchmark.writeOneUsingParallelFor();
}; };
#ifdef HAVE_CUDA
auto cudaWriteOneUsingParallelFor = [&] () auto cudaWriteOneUsingParallelFor = [&] ()
{ {
cudaTraverserBenchmark.writeOneUsingParallelFor(); cudaTraverserBenchmark.writeOneUsingParallelFor();
}; };
#endif
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); if( tests == "all" || tests == "no-bc-parallel-for" )
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingParallelFor ); {
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostWriteOneUsingParallelFor );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingParallelFor ); if( withCuda )
benchmark.time< Devices::Cuda >( "GPU", cudaWriteOneUsingParallelFor );
#endif #endif
benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingParallelFor ); benchmark.time< Devices::Host >( hostReset, "CPU", hostWriteOneUsingParallelFor );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingParallelFor ); if( withCuda )
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaWriteOneUsingParallelFor );
#endif #endif
}
/**** /****
* Write one using traverser * Write one using traverser
...@@ -154,96 +168,129 @@ bool runBenchmark( const Config::ParameterContainer& parameters, ...@@ -154,96 +168,129 @@ bool runBenchmark( const Config::ParameterContainer& parameters,
hostTraverserBenchmark.reset(); hostTraverserBenchmark.reset();
}; };
#ifdef HAVE_CUDA
auto cudaReset = [&]() auto cudaReset = [&]()
{ {
cudaTraverserBenchmark.reset(); cudaTraverserBenchmark.reset();
}; };
#endif
benchmark.setMetadataColumns( benchmark.setMetadataColumns(
Benchmark::MetadataColumns( Benchmark::MetadataColumns(
{ {"size", convertToString( size ) }, } ) ); { {"size", convertToString( size ) }, } ) );
/**** /****
* Write one using C for * Write one and two (as BC) using C for
*/ */
auto hostTraverseUsingPureC = [&] () auto hostTraverseUsingPureC = [&] ()
{ {
hostTraverserBenchmark.traverseUsingPureC(); hostTraverserBenchmark.traverseUsingPureC();
}; };
#ifdef HAVE_CUDA
auto cudaTraverseUsingPureC = [&] () auto cudaTraverseUsingPureC = [&] ()
{ {
cudaTraverserBenchmark.traverseUsingPureC(); cudaTraverserBenchmark.traverseUsingPureC();
}; };
#endif
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); if( tests == "all" || tests == "bc-pure-c" )
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingPureC ); {
benchmark.setOperation( "Pure C", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingPureC );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingPureC ); if( withCuda )
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingPureC );
#endif #endif
benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.setOperation( "Pure C RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingPureC ); benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingPureC );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingPureC ); if( withCuda )
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingPureC );
#endif #endif
}
/**** /****
* Write one using parallel for * Write one and two (as BC) using parallel for
*/ */
auto hostTraverseUsingParallelFor = [&] () auto hostTraverseUsingParallelFor = [&] ()
{ {
hostTraverserBenchmark.writeOneUsingParallelFor(); hostTraverserBenchmark.writeOneUsingParallelFor();
}; };
#ifdef HAVE_CUDA
auto cudaTraverseUsingParallelFor = [&] () auto cudaTraverseUsingParallelFor = [&] ()
{ {
cudaTraverserBenchmark.writeOneUsingParallelFor(); cudaTraverserBenchmark.writeOneUsingParallelFor();
}; };
#endif
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); if( tests == "all" || tests == "bc-parallel-for" )
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingParallelFor ); {
benchmark.setOperation( "parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingParallelFor );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingParallelFor ); if( withCuda )
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingParallelFor );
#endif #endif
benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.setOperation( "parallel for RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingParallelFor ); benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingParallelFor );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingParallelFor ); if( withCuda )
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingParallelFor );
#endif #endif
}
/**** /****
* Write one using traverser * Write one and two (as BC) using traverser
*/ */
auto hostTraverseUsingTraverser = [&] () auto hostTraverseUsingTraverser = [&] ()
{ {
hostTraverserBenchmark.writeOneUsingTraverser(); hostTraverserBenchmark.writeOneUsingTraverser();
}; };
#ifdef HAVE_CUDA
auto cudaTraverseUsingTraverser = [&] () auto cudaTraverseUsingTraverser = [&] ()
{ {
cudaTraverserBenchmark.writeOneUsingTraverser(); cudaTraverserBenchmark.writeOneUsingTraverser();
}; };
#endif
benchmark.setOperation( "traverser", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); if( tests == "all" || tests == "bc-traverser" )
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingTraverser ); {
benchmark.setOperation( "traverser", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( hostReset, "CPU", hostTraverseUsingTraverser );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingTraverser ); benchmark.time< Devices::Cuda >( cudaReset, "GPU", cudaTraverseUsingTraverser );
#endif #endif
benchmark.setOperation( "traverser RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.setOperation( "traverser RST", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB );
benchmark.time< Devices::Host >( "CPU", hostTraverseUsingTraverser ); benchmark.time< Devices::Host >( "CPU", hostTraverseUsingTraverser );
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingTraverser ); benchmark.time< Devices::Cuda >( "GPU", cudaTraverseUsingTraverser );
#endif #endif
}
} }
return true; return true;
} }
void setupConfig( Config::ConfigDescription& config ) void setupConfig( Config::ConfigDescription& config )
{ {
config.addEntry< String >( "tests", "Tests to be performed.", "all" );
config.addEntryEnum( "all" );
config.addEntryEnum( "no-bc-pure-c" );
config.addEntryEnum( "no-bc-parallel-for" );
config.addEntryEnum( "no-bc-traverser" );
config.addEntryEnum( "bc-pure-c" );
config.addEntryEnum( "bc-parallel-for" );
config.addEntryEnum( "bc-traverser" );
#ifdef HAVE_CUDA
config.addEntry< bool >( "with-cuda", "Perform even the CUDA benchmarks.", true );
#else
config.addEntry< bool >( "with-cuda", "Perform even the CUDA benchmarks.", false );
#endif
config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-traversers.log"); config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-traversers.log");
config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
config.addEntryEnum( "append" ); config.addEntryEnum( "append" );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment