Skip to content
Snippets Groups Projects
Commit 49f4459a authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added parser of command line parameters to tnl-cuda-benchmarks

parent 9cb9ea5b
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,19 @@ ...@@ -3,6 +3,19 @@
import sys import sys
import collections import collections
def getSortKey(value):
# try to convert to number if possible
try:
return int(value)
except ValueError:
try:
return float(value)
except ValueError:
if value:
return value
# None or empty string
return 0
class columnFormating: class columnFormating:
def __init__( self, data ): def __init__( self, data ):
...@@ -305,7 +318,7 @@ class logToHtmlConvertor: ...@@ -305,7 +318,7 @@ class logToHtmlConvertor:
# TODO: check this # TODO: check this
# sort again (just in case, previous sorting might compare values from # sort again (just in case, previous sorting might compare values from
# different columns) # different columns)
self.tableRows.sort(key=lambda row: list(row.values())) self.tableRows.sort(key=lambda row: [getSortKey(value) for value in row.values()])
def countSubcolumns( self ): def countSubcolumns( self ):
for path, col in self.tableColumns.items(): for path, col in self.tableColumns.items():
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#ifndef TNLCUDABENCHMARKS_H_ #ifndef TNLCUDABENCHMARKS_H_
#define TNLCUDBENCHMARKS_H_ #define TNLCUDBENCHMARKS_H_
#include <config/tnlConfigDescription.h>
#include <config/tnlParameterContainer.h>
#include <core/tnlList.h> #include <core/tnlList.h>
#include <matrices/tnlCSRMatrix.h> #include <matrices/tnlCSRMatrix.h>
#include <matrices/tnlEllpackMatrix.h> #include <matrices/tnlEllpackMatrix.h>
...@@ -54,7 +56,7 @@ int setHostTestMatrix( Matrix& matrix, ...@@ -54,7 +56,7 @@ int setHostTestMatrix( Matrix& matrix,
matrix.setElement( row, col + element, element + 1 ); matrix.setElement( row, col + element, element + 1 );
elements++; elements++;
} }
} }
} }
return elements; return elements;
} }
...@@ -67,13 +69,13 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix, ...@@ -67,13 +69,13 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix,
const int rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; const int rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx >= matrix->getRows() ) if( rowIdx >= matrix->getRows() )
return; return;
int col = rowIdx - elementsPerRow / 2; int col = rowIdx - elementsPerRow / 2;
for( int element = 0; element < elementsPerRow; element++ ) for( int element = 0; element < elementsPerRow; element++ )
{ {
if( col + element >= 0 && if( col + element >= 0 &&
col + element < matrix->getColumns() ) col + element < matrix->getColumns() )
matrix->setElementFast( rowIdx, col + element, element + 1 ); matrix->setElementFast( rowIdx, col + element, element + 1 );
} }
} }
template< typename Matrix > template< typename Matrix >
...@@ -186,79 +188,115 @@ benchmarkSpMV( Benchmark & benchmark, ...@@ -186,79 +188,115 @@ benchmarkSpMV( Benchmark & benchmark,
return true; return true;
} }
int main( int argc, char* argv[] ) template< typename Real >
void
runCudaBenchmarks( Benchmark & benchmark,
Benchmark::MetadataMap metadata,
const unsigned & minSize,
const unsigned & maxSize,
const unsigned & loops,
const unsigned & elementsPerRow )
{
const tnlString precision = getType< Real >();
metadata["precision"] = precision;
// Array operations
benchmark.newBenchmark( tnlString("Array operations (") + precision + ")",
metadata );
for( unsigned size = minSize; size <= maxSize; size *= 2 ) {
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{"size", size},
} ));
benchmarkArrayOperations< Real >( benchmark, loops, size );
}
// Vector operations
benchmark.newBenchmark( tnlString("Vector operations (") + precision + ")",
metadata );
for( unsigned size = minSize; size <= maxSize; size *= 2 ) {
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{"size", size},
} ));
benchmarkVectorOperations< Real >( benchmark, loops, size );
}
// Sparse matrix-vector multiplication
benchmark.newBenchmark( tnlString("Sparse matrix-vector multiplication (") + precision + ")",
metadata );
for( unsigned size = minSize; size <= maxSize; size *= 2 ) {
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{"rows", size},
{"columns", size},
{"elements per row", elementsPerRow},
} ));
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, tnlEllpackMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, SlicedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, tnlChunkedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
}
}
void
setupConfig( tnlConfigDescription & config )
{
config.addDelimiter( "Benchmark settings:" );
config.addEntry< tnlString >( "log-file", "Log file name.", "tnl-cuda-benchmarks.log");
config.addEntry< tnlString >( "precision", "Precision of the arithmetics.", "double" );
config.addEntryEnum( "float" );
config.addEntryEnum( "double" );
config.addEntryEnum( "all" );
config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark (next size is 2*min-size and so on, up to max-size).", 100000 );
config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark (next size is 2*min-size and so on, up to max-size).", 10000000 );
config.addEntry< int >( "loops", "Number of iterations for every computation.", 1.0 );
config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 );
config.addEntry< int >( "verbose", "Verbose mode.", 1 );
}
int
main( int argc, char* argv[] )
{ {
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
tnlParameterContainer parameters;
typedef double Real; tnlConfigDescription conf_desc;
tnlString precision = getType< Real >();
setupConfig( conf_desc );
/****
* The first argument of this program is the size od data set to be reduced. if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) {
* If no argument is given we use hardcoded default value. conf_desc.printUsage( argv[ 0 ] );
*/ return 1;
int size = 1 << 22; }
if( argc > 1 )
size = atoi( argv[ 1 ] ); ofstream logFile( parameters.getParameter< tnlString >( "log-file" ).getString() );
int loops = 10; const tnlString & precision = parameters.getParameter< tnlString >( "precision" );
if( argc > 2 ) const unsigned minSize = parameters.getParameter< unsigned >( "min-size" );
loops = atoi( argv[ 2 ] ); const unsigned maxSize = parameters.getParameter< unsigned >( "max-size" );
int elementsPerRow = 5; const unsigned loops = parameters.getParameter< unsigned >( "loops" );
if( argc > 3 ) const unsigned elementsPerRow = parameters.getParameter< unsigned >( "elements-per-row" );
elementsPerRow = atoi( argv[ 3 ] ); const unsigned verbose = parameters.getParameter< unsigned >( "verbose" );
ofstream logFile( "tnl-cuda-benchmarks.log" ); // init benchmark and common metadata
Benchmark benchmark( loops, true ); Benchmark benchmark( loops, verbose );
// ostream & logFile = cout; // TODO: add hostname, CPU info, GPU info, date, ...
// Benchmark benchmark( loops, false ); Benchmark::MetadataMap metadata {
// {"key", value},
// TODO: add hostname, CPU info, GPU info, date, ... };
Benchmark::MetadataMap metadata {
{"precision", precision}, if( precision == "all" || precision == "float" )
}; runCudaBenchmarks< float >( benchmark, metadata, minSize, maxSize, loops, elementsPerRow );
// TODO: loop over sizes if( precision == "all" || precision == "double" )
runCudaBenchmarks< double >( benchmark, metadata, minSize, maxSize, loops, elementsPerRow );
// Array operations if( ! benchmark.save( logFile ) ) {
benchmark.newBenchmark( tnlString("Array operations (") + precision + ")", cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< tnlString >( "log-file" ) << "'." << endl;
metadata ); return EXIT_FAILURE;
benchmark.setMetadataColumns( Benchmark::MetadataColumns({ }
{"size", size},
} )); return EXIT_SUCCESS;
benchmarkArrayOperations< Real >( benchmark, loops, size );
// Vector operations
benchmark.newBenchmark( tnlString("Vector operations (") + precision + ")",
metadata );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{"size", size},
} ));
benchmarkVectorOperations< Real >( benchmark, loops, size );
// Sparse matrix-vector multiplication
benchmark.newBenchmark( tnlString("Sparse matrix-vector multiplication (") + precision + ")",
metadata );
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{"rows", size},
{"columns", size},
{"elements per row", elementsPerRow},
} ));
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, tnlEllpackMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, SlicedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
benchmarkSpMV< Real, tnlChunkedEllpackMatrix >( benchmark, loops, size, elementsPerRow );
if( ! benchmark.save( logFile ) )
return EXIT_FAILURE;
return EXIT_SUCCESS;
#else #else
tnlCudaSupportMissingMessage; tnlCudaSupportMissingMessage;
return EXIT_FAILURE; return EXIT_FAILURE;
#endif #endif
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment