From 7611149633cbaca282c824937ce6cc332a7815a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Wed, 12 Sep 2018 10:24:09 +0200 Subject: [PATCH] Added tnl-benchmark-distributed-spmv --- src/Benchmarks/CMakeLists.txt | 1 + src/Benchmarks/DistSpMV/CMakeLists.txt | 11 + src/Benchmarks/DistSpMV/ordering.h | 133 ++++++ .../tnl-benchmark-distributed-spmv.cpp | 11 + .../tnl-benchmark-distributed-spmv.cu | 11 + .../DistSpMV/tnl-benchmark-distributed-spmv.h | 395 ++++++++++++++++++ 6 files changed, 562 insertions(+) create mode 100644 src/Benchmarks/DistSpMV/CMakeLists.txt create mode 100644 src/Benchmarks/DistSpMV/ordering.h create mode 100644 src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp create mode 100644 src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu create mode 100644 src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h diff --git a/src/Benchmarks/CMakeLists.txt b/src/Benchmarks/CMakeLists.txt index e34ade5be3..e0637205f7 100644 --- a/src/Benchmarks/CMakeLists.txt +++ b/src/Benchmarks/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory( HeatEquation ) add_subdirectory( BLAS ) add_subdirectory( SpMV ) +add_subdirectory( DistSpMV ) add_subdirectory( LinearSolvers ) set( headers diff --git a/src/Benchmarks/DistSpMV/CMakeLists.txt b/src/Benchmarks/DistSpMV/CMakeLists.txt new file mode 100644 index 0000000000..57ccdd7a9f --- /dev/null +++ b/src/Benchmarks/DistSpMV/CMakeLists.txt @@ -0,0 +1,11 @@ +if( BUILD_CUDA ) + cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu ) + target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl ) + + install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin ) +endif() + +add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp ) +target_link_libraries( tnl-benchmark-distributed-spmv tnl ) + +install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin ) diff --git a/src/Benchmarks/DistSpMV/ordering.h b/src/Benchmarks/DistSpMV/ordering.h new file mode 100644 index 0000000000..5bd68a95bf --- /dev/null +++ b/src/Benchmarks/DistSpMV/ordering.h @@ -0,0 +1,133 @@ +#pragma once + +#include <algorithm> + +#include <TNL/Devices/Host.h> +#include <TNL/ParallelFor.h> + +using namespace TNL; + +template< typename Matrix, typename PermutationVector > +void +getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm ) +{ + using IndexType = typename Matrix::IndexType; + + // allocate permutation vectors + perm.setSize( matrix.getRows() ); + iperm.setSize( matrix.getRows() ); + + const IndexType N = matrix.getRows() / 2; + for( IndexType i = 0; i < N; i++ ) { + perm[ 2 * i ] = i; + perm[ 2 * i + 1 ] = i + N; + iperm[ i ] = 2 * i; + iperm[ i + N ] = 2 * i + 1; + } +} + +template< typename Vector, typename PermutationVector > +void +reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ) +{ + TNL_ASSERT_EQ( src.getSize(), perm.getSize(), + "Source vector and permutation must have the same size." ); + using RealType = typename Vector::RealType; + using DeviceType = typename Vector::DeviceType; + using IndexType = typename Vector::IndexType; + + auto kernel = [] __cuda_callable__ + ( IndexType i, + const RealType* src, + RealType* dest, + const typename PermutationVector::RealType* perm ) + { + dest[ i ] = src[ perm[ i ] ]; + }; + + dest.setLike( src ); + + ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), + kernel, + src.getData(), + dest.getData(), + perm.getData() ); +} + +template< typename Matrix, typename PermutationVector > +void +reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm ) +{ + // TODO: implement on GPU + static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); + static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); + + using namespace TNL; + using IndexType = typename Matrix::IndexType; + + matrix2.setLike( matrix1 ); + + // general multidimensional accessors for permutation indices + // TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed + auto perm = [&]( IndexType dof ) { + TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); + const IndexType i = dof / _perm.getSize(); + return i * _perm.getSize() + _perm[ dof % _perm.getSize() ]; + }; + auto iperm = [&]( IndexType dof ) { + TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); + const IndexType i = dof / _iperm.getSize(); + return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ]; + }; + + // set row lengths + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( matrix1.getRows() ); + for( IndexType i = 0; i < matrix1.getRows(); i++ ) { + const IndexType maxLength = matrix1.getRowLength( perm( i ) ); + const auto row = matrix1.getRow( perm( i ) ); + IndexType length = 0; + for( IndexType j = 0; j < maxLength; j++ ) + if( row.getElementColumn( j ) < matrix1.getColumns() ) + length++; + rowLengths[ i ] = length; + } + matrix2.setCompressedRowLengths( rowLengths ); + + // set row elements + for( IndexType i = 0; i < matrix2.getRows(); i++ ) { + const IndexType rowLength = rowLengths[ i ]; + + // extract sparse row + const auto row1 = matrix1.getRow( perm( i ) ); + + // permute + typename Matrix::IndexType columns[ rowLength ]; + typename Matrix::RealType values[ rowLength ]; + for( IndexType j = 0; j < rowLength; j++ ) { + columns[ j ] = iperm( row1.getElementColumn( j ) ); + values[ j ] = row1.getElementValue( j ); + } + + // sort + IndexType indices[ rowLength ]; + for( IndexType j = 0; j < rowLength; j++ ) + indices[ j ] = j; + // nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?) + // error: a variable captured by a lambda cannot have a type involving a variable-length array + IndexType* _columns = columns; + auto comparator = [=]( IndexType a, IndexType b ) { + return _columns[ a ] < _columns[ b ]; + }; + std::sort( indices, indices + rowLength, comparator ); + + typename Matrix::IndexType sortedColumns[ rowLength ]; + typename Matrix::RealType sortedValues[ rowLength ]; + for( IndexType j = 0; j < rowLength; j++ ) { + sortedColumns[ j ] = columns[ indices[ j ] ]; + sortedValues[ j ] = values[ indices[ j ] ]; + } + + matrix2.setRow( i, sortedColumns, sortedValues, rowLength ); + } +} diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp new file mode 100644 index 0000000000..63c02eab46 --- /dev/null +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + tnl-benchmark-distributed-spmv.cpp - description + ------------------- + begin : Sep 11, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "tnl-benchmark-distributed-spmv.h" diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu new file mode 100644 index 0000000000..7e9094fbbc --- /dev/null +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + tnl-benchmark-distributed-spmv.cu - description + ------------------- + begin : Sep 11, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "tnl-benchmark-distributed-spmv.h" diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h new file mode 100644 index 0000000000..1aed36ec8f --- /dev/null +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h @@ -0,0 +1,395 @@ +/*************************************************************************** + tnl-benchmark-distributed-spmv.h - description + ------------------- + begin : Sep 11, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Jakub Klinkovsky + +#pragma once + +#ifndef NDEBUG +#include <TNL/Debugging/FPE.h> +#endif + +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> +#include <TNL/Devices/SystemInfo.h> +#include <TNL/Devices/CudaDeviceInfo.h> +#include <TNL/Config/ConfigDescription.h> +#include <TNL/Config/ParameterContainer.h> +#include <TNL/Communicators/MpiCommunicator.h> +#include <TNL/Communicators/NoDistrCommunicator.h> +#include <TNL/Communicators/ScopedInitializer.h> +#include <TNL/DistributedContainers/Partitioner.h> +#include <TNL/DistributedContainers/DistributedVector.h> +#include <TNL/DistributedContainers/DistributedMatrix.h> + +#include "../Benchmarks.h" +#include "ordering.h" + +#include <TNL/Matrices/SlicedEllpack.h> + +using namespace TNL; +using namespace TNL::Benchmarks; + +#ifdef HAVE_MPI +using CommunicatorType = Communicators::MpiCommunicator; +#else +using CommunicatorType = Communicators::NoDistrCommunicator; +#endif + + +template< typename Matrix, typename Vector > +void +benchmarkSpmv( Benchmark& benchmark, + const Matrix& matrix, + const Vector& x, + const char* performer = "CPU" ) +{ + Vector y; + y.setLike( x ); + + // reset function + auto reset = [&]() { + y = x; + }; + + // benchmark function + auto compute = [&]() { + matrix.vectorProduct( x, y ); + }; + + benchmark.time( reset, performer, compute ); +} + +template< typename Matrix, typename Vector > +void +benchmarkSpmvCuda( Benchmark& benchmark, + const Matrix& matrix, + const Vector& x ) +{ + using RealType = typename Matrix::RealType; + using IndexType = typename Matrix::IndexType; + using CudaMatrix = typename Matrix::CudaType; + using CudaVector = typename Vector::CudaType; + + CudaVector cuda_x; + cuda_x = x; + + Timer t; + t.start(); + + CudaMatrix cuda_matrix; + cuda_matrix = matrix; + + t.stop(); + std::cout << "--> Copying the matrix to the GPU took " << t.getRealTime() << " seconds." << std::endl; + + benchmarkSpmv( benchmark, cuda_matrix, cuda_x, "GPU" ); +} + +template< typename Matrix, typename Vector > +void +benchmarkDistributedSpmv( Benchmark& benchmark, + // TODO: cannot be const due to internal buffering +// const Matrix& matrix, + Matrix& matrix, + const Vector& x, + const char* performer = "CPU" ) +{ + Vector y; + y.setLike( x ); + + // reset function + auto reset = [&]() { + y = x; + }; + + // benchmark function + auto compute = [&]() { + matrix.vectorProduct( x, y ); + Matrix::CommunicatorType::Barrier( matrix.getCommunicationGroup() ); + }; + + benchmark.time( reset, performer, compute ); +} + +template< typename Matrix, typename Vector > +void +benchmarkDistributedSpmvCuda( Benchmark& benchmark, + const Matrix& matrix, + const Vector& x ) +{ + using RealType = typename Matrix::RealType; + using IndexType = typename Matrix::IndexType; + using CudaMatrix = typename Matrix::CudaType; + using CudaVector = typename Vector::CudaType; + + CudaVector cuda_x; + cuda_x = x; + + Timer t; + t.start(); + + CudaMatrix cuda_matrix; + cuda_matrix = matrix; + + t.stop(); + std::cout << "--> Copying the matrix to the GPU took " << t.getRealTime() << " seconds." << std::endl; + + benchmarkDistributedSpmv( benchmark, cuda_matrix, cuda_x, "GPU" ); +} + +template< typename MatrixType > +struct SpmvBenchmark +{ + using RealType = typename MatrixType::RealType; + using DeviceType = typename MatrixType::DeviceType; + using IndexType = typename MatrixType::IndexType; + using VectorType = Containers::Vector< RealType, DeviceType, IndexType >; + + using Partitioner = DistributedContainers::Partitioner< IndexType, CommunicatorType >; + using DistributedMatrix = DistributedContainers::DistributedMatrix< MatrixType, CommunicatorType >; + using DistributedVector = DistributedContainers::DistributedVector< RealType, DeviceType, IndexType, CommunicatorType >; + using DistributedRowLengths = typename DistributedMatrix::CompressedRowLengthsVector; + + static bool + run( Benchmark& benchmark, + Benchmark::MetadataMap metadata, + const Config::ParameterContainer& parameters ) + { + MatrixType matrix; + VectorType vector; + if( ! matrix.load( parameters.getParameter< String >( "input-matrix" ) ) || + ! vector.load( parameters.getParameter< String >( "input-vector" ) ) ) + return false; + + typename MatrixType::CompressedRowLengthsVector rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + const IndexType maxRowLength = rowLengths.max(); + + const String name = String( (CommunicatorType::isDistributed()) ? "DistSpMV" : "SpMV" ) + + " (" + parameters.getParameter< String >( "name" ) + "): "; + benchmark.newBenchmark( name, metadata ); + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + // TODO: strip the device +// {"matrix type", matrix.getType()}, + {"rows", matrix.getRows()}, + {"columns", matrix.getColumns()}, + // FIXME: getMaxRowLengths() returns 0 for matrices loaded from file +// {"max elements per row", matrix.getMaxRowLength()}, + {"max elements per row", maxRowLength}, + } )); + + const bool reorder = parameters.getParameter< bool >( "reorder-dofs" ); + if( reorder ) { + using PermutationVector = Containers::Vector< IndexType, DeviceType, IndexType >; + PermutationVector perm, iperm; + getTrivialOrdering( matrix, perm, iperm ); + MatrixType matrix_perm; + reorderMatrix( matrix, matrix_perm, perm, iperm ); + if( CommunicatorType::isDistributed() ) + runDistributed( benchmark, metadata, parameters, matrix_perm, vector ); + else + runNonDistributed( benchmark, metadata, parameters, matrix_perm, vector ); + } + else { + if( CommunicatorType::isDistributed() ) + runDistributed( benchmark, metadata, parameters, matrix, vector ); + else + runNonDistributed( benchmark, metadata, parameters, matrix, vector ); + } + + return true; + } + + static void + runNonDistributed( Benchmark& benchmark, + Benchmark::MetadataMap metadata, + const Config::ParameterContainer& parameters, + MatrixType& matrix, + VectorType& vector ) + { + benchmarkSpmv( benchmark, matrix, vector ); +#ifdef HAVE_CUDA + benchmarkSpmvCuda( benchmark, matrix, vector ); +#endif + } + + static void + runDistributed( Benchmark& benchmark, + Benchmark::MetadataMap metadata, + const Config::ParameterContainer& parameters, + MatrixType& matrix, + VectorType& vector ) + { + // set up the distributed matrix + const auto group = CommunicatorType::AllGroup; + const auto localRange = Partitioner::splitRange( matrix.getRows(), group ); + DistributedMatrix distributedMatrix( localRange, matrix.getRows(), matrix.getColumns(), group ); + DistributedVector distributedVector( localRange, matrix.getRows(), group ); + + // copy the row lengths from the global matrix to the distributed matrix + DistributedRowLengths distributedRowLengths( localRange, matrix.getRows(), group ); + for( IndexType i = 0; i < distributedMatrix.getLocalMatrix().getRows(); i++ ) { + const auto gi = distributedMatrix.getLocalRowRange().getGlobalIndex( i ); + distributedRowLengths[ gi ] = matrix.getRowLength( gi ); + } + distributedMatrix.setCompressedRowLengths( distributedRowLengths ); + + // copy data from the global matrix/vector into the distributed matrix/vector + for( IndexType i = 0; i < distributedMatrix.getLocalMatrix().getRows(); i++ ) { + const auto gi = distributedMatrix.getLocalRowRange().getGlobalIndex( i ); + distributedVector[ gi ] = vector[ gi ]; + + const IndexType rowLength = matrix.getRowLength( i ); + IndexType columns[ rowLength ]; + RealType values[ rowLength ]; + matrix.getRowFast( gi, columns, values ); + distributedMatrix.setRowFast( gi, columns, values, rowLength ); + } + + benchmarkDistributedSpmv( benchmark, distributedMatrix, distributedVector ); +#ifdef HAVE_CUDA + benchmarkDistributedSpmvCuda( benchmark, distributedMatrix, distributedVector ); +#endif + +#ifndef NDEBUG + // compare results of normal and distributed spmv + VectorType y; + y.setLike( vector ); + matrix.vectorProduct( vector, y ); + DistributedVector distributedY; + distributedY.setLike( distributedVector ); + distributedMatrix.vectorProduct( distributedVector, distributedY ); + const int rank = CommunicatorType::GetRank( distributedMatrix.getCommunicationGroup() ); + const int nproc = CommunicatorType::GetSize( distributedMatrix.getCommunicationGroup() ); + VectorType subY( y, + Partitioner::getOffset( matrix.getRows(), rank, nproc ), + Partitioner::getSizeForRank( matrix.getRows(), rank, nproc ) ); + TNL_ASSERT_EQ( distributedY.getLocalVectorView(), subY, "WRONG RESULT !!!" ); +#endif + } +}; + +void +configSetup( Config::ConfigDescription & config ) +{ + config.addDelimiter( "Benchmark settings:" ); + config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-distributed-spmv.log"); + config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" ); + config.addEntryEnum( "append" ); + config.addEntryEnum( "overwrite" ); + config.addEntry< int >( "loops", "Number of repetitions of the benchmark.", 10 ); + config.addRequiredEntry< String >( "input-matrix", "File name of the input matrix (in binary TNL format)." ); + config.addRequiredEntry< String >( "input-vector", "File name of the input vector (in binary TNL format)." ); + config.addEntry< String >( "name", "Name of the matrix in the benchmark.", "" ); + config.addEntry< int >( "verbose", "Verbose mode.", 1 ); + config.addEntry< bool >( "reorder-dofs", "Reorder matrix entries corresponding to the same DOF together.", false ); + + config.addDelimiter( "Device settings:" ); + Devices::Host::configSetup( config ); + Devices::Cuda::configSetup( config ); + CommunicatorType::configSetup( config ); +} + +int +main( int argc, char* argv[] ) +{ +#ifndef NDEBUG + Debugging::trackFloatingPointExceptions(); +#endif + + Config::ParameterContainer parameters; + Config::ConfigDescription conf_desc; + + configSetup( conf_desc ); + + Communicators::ScopedInitializer< CommunicatorType > scopedInit(argc, argv); + const int rank = CommunicatorType::GetRank( CommunicatorType::AllGroup ); + + if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) { + conf_desc.printUsage( argv[ 0 ] ); + return EXIT_FAILURE; + } + + if( ! Devices::Host::setup( parameters ) || + ! Devices::Cuda::setup( parameters ) || + ! CommunicatorType::setup( parameters ) ) + return EXIT_FAILURE; + + const String & logFileName = parameters.getParameter< String >( "log-file" ); + const String & outputMode = parameters.getParameter< String >( "output-mode" ); + const unsigned loops = parameters.getParameter< unsigned >( "loops" ); + const unsigned verbose = (rank == 0) ? parameters.getParameter< unsigned >( "verbose" ) : 0; + + // open log file + auto mode = std::ios::out; + if( outputMode == "append" ) + mode |= std::ios::app; + std::ofstream logFile; + if( rank == 0 ) + logFile.open( logFileName.getString(), mode ); + + // init benchmark and common metadata + Benchmark benchmark( loops, verbose ); + + // prepare global metadata + const int cpu_id = 0; + Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); + String cacheInfo = String( cacheSizes.L1data ) + ", " + + String( cacheSizes.L1instruction ) + ", " + + String( cacheSizes.L2 ) + ", " + + String( cacheSizes.L3 ); +#ifdef HAVE_CUDA + const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); + const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); +#endif + Benchmark::MetadataMap metadata { + { "host name", Devices::SystemInfo::getHostname() }, + { "architecture", Devices::SystemInfo::getArchitecture() }, + { "system", Devices::SystemInfo::getSystemName() }, + { "system release", Devices::SystemInfo::getSystemRelease() }, + { "start time", Devices::SystemInfo::getCurrentTime() }, +#ifdef HAVE_MPI + { "number of MPI processes", CommunicatorType::GetSize( CommunicatorType::AllGroup ) }, +#endif + { "OpenMP enabled", Devices::Host::isOMPEnabled() }, + { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, + { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, + { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, + { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, + { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, + { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, +#ifdef HAVE_CUDA + { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, + { "GPU architecture", deviceArch }, + { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, + { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, + { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, + { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, + { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, +#endif + }; + + // TODO: implement resolveMatrixType +// return ! Matrices::resolveMatrixType< MainConfig, +// Devices::Host, +// SpmvBenchmark >( benchmark, metadata, parameters ); + using MatrixType = Matrices::SlicedEllpack< double, Devices::Host, int >; + const bool status = SpmvBenchmark< MatrixType >::run( benchmark, metadata, parameters ); + + if( rank == 0 ) + if( ! benchmark.save( logFile ) ) { + std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; + return EXIT_FAILURE; + } + + return ! status; +} -- GitLab