diff --git a/tests/benchmarks/share/tnl-log-to-html.py b/tests/benchmarks/share/tnl-log-to-html.py index 6a2c4c6315a7490150e54be6be078febd5518490..38adaad4002dc50f7039e006058cb692c6523ade 100644 --- a/tests/benchmarks/share/tnl-log-to-html.py +++ b/tests/benchmarks/share/tnl-log-to-html.py @@ -3,6 +3,19 @@ import sys import collections +def getSortKey(value): + # try to convert to number if possible + try: + return int(value) + except ValueError: + try: + return float(value) + except ValueError: + if value: + return value + # None or empty string + return 0 + class columnFormating: def __init__( self, data ): @@ -305,7 +318,7 @@ class logToHtmlConvertor: # TODO: check this # sort again (just in case, previous sorting might compare values from # different columns) - self.tableRows.sort(key=lambda row: list(row.values())) + self.tableRows.sort(key=lambda row: [getSortKey(value) for value in row.values()]) def countSubcolumns( self ): for path, col in self.tableColumns.items(): diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h index 7d5f562c8729d2d8cc2d61943fd242756522874f..fa9f5bf9dd561b41d08a84d827e393c393aa7408 100644 --- a/tests/benchmarks/tnl-cuda-benchmarks.h +++ b/tests/benchmarks/tnl-cuda-benchmarks.h @@ -18,6 +18,8 @@ #ifndef TNLCUDABENCHMARKS_H_ #define TNLCUDBENCHMARKS_H_ +#include <config/tnlConfigDescription.h> +#include <config/tnlParameterContainer.h> #include <core/tnlList.h> #include <matrices/tnlCSRMatrix.h> #include <matrices/tnlEllpackMatrix.h> @@ -54,7 +56,7 @@ int setHostTestMatrix( Matrix& matrix, matrix.setElement( row, col + element, element + 1 ); elements++; } - } + } } return elements; } @@ -67,13 +69,13 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix, const int rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= matrix->getRows() ) return; - int col = rowIdx - elementsPerRow / 2; + int col = rowIdx - elementsPerRow / 2; for( int element = 0; element < elementsPerRow; element++ ) { if( col + element >= 0 && col + element < matrix->getColumns() ) matrix->setElementFast( rowIdx, col + element, element + 1 ); - } + } } template< typename Matrix > @@ -186,79 +188,115 @@ benchmarkSpMV( Benchmark & benchmark, return true; } -int main( int argc, char* argv[] ) +template< typename Real > +void +runCudaBenchmarks( Benchmark & benchmark, + Benchmark::MetadataMap metadata, + const unsigned & minSize, + const unsigned & maxSize, + const unsigned & loops, + const unsigned & elementsPerRow ) +{ + const tnlString precision = getType< Real >(); + metadata["precision"] = precision; + + // Array operations + benchmark.newBenchmark( tnlString("Array operations (") + precision + ")", + metadata ); + for( unsigned size = minSize; size <= maxSize; size *= 2 ) { + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + {"size", size}, + } )); + benchmarkArrayOperations< Real >( benchmark, loops, size ); + } + + // Vector operations + benchmark.newBenchmark( tnlString("Vector operations (") + precision + ")", + metadata ); + for( unsigned size = minSize; size <= maxSize; size *= 2 ) { + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + {"size", size}, + } )); + benchmarkVectorOperations< Real >( benchmark, loops, size ); + } + + // Sparse matrix-vector multiplication + benchmark.newBenchmark( tnlString("Sparse matrix-vector multiplication (") + precision + ")", + metadata ); + for( unsigned size = minSize; size <= maxSize; size *= 2 ) { + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + {"rows", size}, + {"columns", size}, + {"elements per row", elementsPerRow}, + } )); + + // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) + benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow ); + benchmarkSpMV< Real, tnlEllpackMatrix >( benchmark, loops, size, elementsPerRow ); + benchmarkSpMV< Real, SlicedEllpackMatrix >( benchmark, loops, size, elementsPerRow ); + benchmarkSpMV< Real, tnlChunkedEllpackMatrix >( benchmark, loops, size, elementsPerRow ); + } +} + +void +setupConfig( tnlConfigDescription & config ) +{ + config.addDelimiter( "Benchmark settings:" ); + config.addEntry< tnlString >( "log-file", "Log file name.", "tnl-cuda-benchmarks.log"); + config.addEntry< tnlString >( "precision", "Precision of the arithmetics.", "double" ); + config.addEntryEnum( "float" ); + config.addEntryEnum( "double" ); + config.addEntryEnum( "all" ); + config.addEntry< int >( "min-size", "Minimum size of arrays/vectors used in the benchmark (next size is 2*min-size and so on, up to max-size).", 100000 ); + config.addEntry< int >( "max-size", "Minimum size of arrays/vectors used in the benchmark (next size is 2*min-size and so on, up to max-size).", 10000000 ); + config.addEntry< int >( "loops", "Number of iterations for every computation.", 1.0 ); + config.addEntry< int >( "elements-per-row", "Number of elements per row of the sparse matrix used in the matrix-vector multiplication benchmark.", 5 ); + config.addEntry< int >( "verbose", "Verbose mode.", 1 ); +} + +int +main( int argc, char* argv[] ) { #ifdef HAVE_CUDA - - typedef double Real; - tnlString precision = getType< Real >(); - - /**** - * The first argument of this program is the size od data set to be reduced. - * If no argument is given we use hardcoded default value. - */ - int size = 1 << 22; - if( argc > 1 ) - size = atoi( argv[ 1 ] ); - int loops = 10; - if( argc > 2 ) - loops = atoi( argv[ 2 ] ); - int elementsPerRow = 5; - if( argc > 3 ) - elementsPerRow = atoi( argv[ 3 ] ); - - ofstream logFile( "tnl-cuda-benchmarks.log" ); - Benchmark benchmark( loops, true ); -// ostream & logFile = cout; -// Benchmark benchmark( loops, false ); - - // TODO: add hostname, CPU info, GPU info, date, ... - Benchmark::MetadataMap metadata { - {"precision", precision}, - }; - // TODO: loop over sizes - - - // Array operations - benchmark.newBenchmark( tnlString("Array operations (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - {"size", size}, - } )); - benchmarkArrayOperations< Real >( benchmark, loops, size ); - - // Vector operations - benchmark.newBenchmark( tnlString("Vector operations (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - {"size", size}, - } )); - benchmarkVectorOperations< Real >( benchmark, loops, size ); - - - // Sparse matrix-vector multiplication - benchmark.newBenchmark( tnlString("Sparse matrix-vector multiplication (") + precision + ")", - metadata ); - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - {"rows", size}, - {"columns", size}, - {"elements per row", elementsPerRow}, - } )); - - // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) - benchmarkSpMV< Real, tnlCSRMatrix >( benchmark, loops, size, elementsPerRow ); - benchmarkSpMV< Real, tnlEllpackMatrix >( benchmark, loops, size, elementsPerRow ); - benchmarkSpMV< Real, SlicedEllpackMatrix >( benchmark, loops, size, elementsPerRow ); - benchmarkSpMV< Real, tnlChunkedEllpackMatrix >( benchmark, loops, size, elementsPerRow ); - - - if( ! benchmark.save( logFile ) ) - return EXIT_FAILURE; - - return EXIT_SUCCESS; + tnlParameterContainer parameters; + tnlConfigDescription conf_desc; + + setupConfig( conf_desc ); + + if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) { + conf_desc.printUsage( argv[ 0 ] ); + return 1; + } + + ofstream logFile( parameters.getParameter< tnlString >( "log-file" ).getString() ); + const tnlString & precision = parameters.getParameter< tnlString >( "precision" ); + const unsigned minSize = parameters.getParameter< unsigned >( "min-size" ); + const unsigned maxSize = parameters.getParameter< unsigned >( "max-size" ); + const unsigned loops = parameters.getParameter< unsigned >( "loops" ); + const unsigned elementsPerRow = parameters.getParameter< unsigned >( "elements-per-row" ); + const unsigned verbose = parameters.getParameter< unsigned >( "verbose" ); + + // init benchmark and common metadata + Benchmark benchmark( loops, verbose ); + // TODO: add hostname, CPU info, GPU info, date, ... + Benchmark::MetadataMap metadata { +// {"key", value}, + }; + + if( precision == "all" || precision == "float" ) + runCudaBenchmarks< float >( benchmark, metadata, minSize, maxSize, loops, elementsPerRow ); + if( precision == "all" || precision == "double" ) + runCudaBenchmarks< double >( benchmark, metadata, minSize, maxSize, loops, elementsPerRow ); + + if( ! benchmark.save( logFile ) ) { + cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< tnlString >( "log-file" ) << "'." << endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; #else - tnlCudaSupportMissingMessage; - return EXIT_FAILURE; + tnlCudaSupportMissingMessage; + return EXIT_FAILURE; #endif }