Loading src/Benchmarks/BLAS/array-operations.h +2 −3 Original line number Diff line number Diff line Loading @@ -102,9 +102,8 @@ benchmarkArrayOperations( Benchmark & benchmark, }; #ifdef HAVE_CUDA benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime ); benchmark.time( reset1, "CPU->GPU", copyAssignHostCuda, "GPU->CPU", copyAssignCudaHost ); benchmark.time( reset1, "CPU->GPU", copyAssignHostCuda ); benchmark.time( reset1, "GPU->CPU", copyAssignCudaHost ); #endif Loading src/Benchmarks/BLAS/tnl-benchmark-blas.h +2 −37 Original line number Diff line number Diff line Loading @@ -13,8 +13,7 @@ #pragma once #include <TNL/Devices/Host.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/Cuda.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> Loading @@ -26,9 +25,6 @@ using namespace TNL; using namespace TNL::Benchmarks; // TODO: should benchmarks check the result of the computation? template< typename Real > void runBlasBenchmarks( Benchmark & benchmark, Loading Loading @@ -146,38 +142,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); if( precision == "all" || precision == "float" ) runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow ); Loading src/Benchmarks/Benchmarks.h +104 −29 Original line number Diff line number Diff line Loading @@ -16,11 +16,18 @@ #include <iomanip> #include <map> #include <vector> #include <exception> #include <limits> #include <TNL/Timer.h> #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Communicators/MpiCommunicator.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -64,7 +71,7 @@ timeFunction( ComputeFunction compute, timer.stop(); } return timer.getRealTime(); return timer.getRealTime() / loops; } Loading @@ -75,8 +82,8 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using HeaderElements = std::initializer_list< String >; using RowElements = std::initializer_list< double >; using HeaderElements = std::vector< String >; using RowElements = std::vector< double >; Logging( bool verbose = true ) : verbose(verbose) Loading Loading @@ -109,8 +116,6 @@ public: writeTableHeader( const String & spanningElement, const HeaderElements & subElements ) { using namespace std; if( verbose && header_changed ) { for( auto & it : metadataColumns ) { std::cout << std::setw( 20 ) << it.first; Loading Loading @@ -163,8 +168,6 @@ public: writeTableRow( const String & spanningElement, const RowElements & subElements ) { using namespace std; if( verbose ) { for( auto & it : metadataColumns ) { std::cout << std::setw( 20 ) << it.second; Loading Loading @@ -278,6 +281,27 @@ protected: }; struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; double bandwidth = std::numeric_limits<double>::quiet_NaN(); double time = std::numeric_limits<double>::quiet_NaN(); double speedup = std::numeric_limits<double>::quiet_NaN(); virtual HeaderElements getTableHeader() const { return HeaderElements({"bandwidth", "time", "speedup"}); } virtual RowElements getRowElements() const { return RowElements({ bandwidth, time, speedup }); } }; class Benchmark : protected Logging { Loading Loading @@ -305,7 +329,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); } // Marks the start of a new benchmark (with custom metadata) Loading @@ -315,7 +338,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); // add loops to metadata metadata["loops"] = String(loops); writeMetadata( metadata ); Loading @@ -342,6 +364,7 @@ public: const double datasetSize = 0.0, // in GB const double baseTime = 0.0 ) { monitor.setStage( operation.getString() ); if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) { metadataColumns[ 0 ].second = operation; } Loading Loading @@ -393,43 +416,45 @@ public: double time( ResetFunction reset, const String & performer, ComputeFunction & compute ) ComputeFunction & compute, BenchmarkResult & result ) { double time; result.time = std::numeric_limits<double>::quiet_NaN(); try { if( verbose ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); time = timeFunction( compute, reset, loops, monitor ); result.time = timeFunction( compute, reset, loops, monitor ); } else { time = timeFunction( compute, reset, loops, monitor ); result.time = timeFunction( compute, reset, loops, monitor ); } } catch ( const std::exception& e ) { std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl; } const double bandwidth = datasetSize / time; const double speedup = this->baseTime / time; result.bandwidth = datasetSize / result.time; result.speedup = this->baseTime / result.time; if( this->baseTime == 0.0 ) this->baseTime = time; this->baseTime = result.time; writeTableHeader( performer, HeaderElements({"bandwidth", "time", "speedup"}) ); writeTableRow( performer, RowElements({ bandwidth, time, speedup }) ); writeTableHeader( performer, result.getTableHeader() ); writeTableRow( performer, result.getRowElements() ); return this->baseTime; } // Recursive template function to deal with multiple computations with the // same reset function. template< typename ResetFunction, typename ComputeFunction, typename... NextComputations > inline double time( ResetFunction reset, const String & performer, ComputeFunction & compute, NextComputations & ... nextComputations ) ComputeFunction & compute ) { time( reset, performer, compute ); time( reset, nextComputations... ); return this->baseTime; BenchmarkResult result; return time( reset, performer, compute, result ); } // Adds an error message to the log. Should be called in places where the Loading @@ -445,6 +470,12 @@ public: using Logging::save; Solvers::IterativeSolverMonitor< double, int >& getMonitor() { return monitor; } protected: int loops; double datasetSize = 0.0; Loading @@ -452,5 +483,49 @@ protected: Solvers::IterativeSolverMonitor< double, int > monitor; }; Benchmark::MetadataMap getHardwareMetadata() { const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL src/Benchmarks/CMakeLists.txt +1 −0 Original line number Diff line number Diff line add_subdirectory( HeatEquation ) add_subdirectory( BLAS ) add_subdirectory( SpMV ) add_subdirectory( DistSpMV ) add_subdirectory( LinearSolvers ) set( headers Loading src/Benchmarks/DistSpMV/CMakeLists.txt 0 → 100644 +11 −0 Original line number Diff line number Diff line if( BUILD_CUDA ) cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu ) target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl ) install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin ) endif() add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp ) target_link_libraries( tnl-benchmark-distributed-spmv tnl ) install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin ) Loading
src/Benchmarks/BLAS/array-operations.h +2 −3 Original line number Diff line number Diff line Loading @@ -102,9 +102,8 @@ benchmarkArrayOperations( Benchmark & benchmark, }; #ifdef HAVE_CUDA benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime ); benchmark.time( reset1, "CPU->GPU", copyAssignHostCuda, "GPU->CPU", copyAssignCudaHost ); benchmark.time( reset1, "CPU->GPU", copyAssignHostCuda ); benchmark.time( reset1, "GPU->CPU", copyAssignCudaHost ); #endif Loading
src/Benchmarks/BLAS/tnl-benchmark-blas.h +2 −37 Original line number Diff line number Diff line Loading @@ -13,8 +13,7 @@ #pragma once #include <TNL/Devices/Host.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/Cuda.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> Loading @@ -26,9 +25,6 @@ using namespace TNL; using namespace TNL::Benchmarks; // TODO: should benchmarks check the result of the computation? template< typename Real > void runBlasBenchmarks( Benchmark & benchmark, Loading Loading @@ -146,38 +142,7 @@ main( int argc, char* argv[] ) Benchmark benchmark( loops, verbose ); // prepare global metadata const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; Benchmark::MetadataMap metadata = getHardwareMetadata(); if( precision == "all" || precision == "float" ) runBlasBenchmarks< float >( benchmark, metadata, minSize, maxSize, sizeStepFactor, loops, elementsPerRow ); Loading
src/Benchmarks/Benchmarks.h +104 −29 Original line number Diff line number Diff line Loading @@ -16,11 +16,18 @@ #include <iomanip> #include <map> #include <vector> #include <exception> #include <limits> #include <TNL/Timer.h> #include <TNL/String.h> #include <TNL/Solvers/IterativeSolverMonitor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/SystemInfo.h> #include <TNL/Devices/CudaDeviceInfo.h> #include <TNL/Communicators/MpiCommunicator.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -64,7 +71,7 @@ timeFunction( ComputeFunction compute, timer.stop(); } return timer.getRealTime(); return timer.getRealTime() / loops; } Loading @@ -75,8 +82,8 @@ public: using MetadataMap = std::map< const char*, String >; using MetadataColumns = std::vector<MetadataElement>; using HeaderElements = std::initializer_list< String >; using RowElements = std::initializer_list< double >; using HeaderElements = std::vector< String >; using RowElements = std::vector< double >; Logging( bool verbose = true ) : verbose(verbose) Loading Loading @@ -109,8 +116,6 @@ public: writeTableHeader( const String & spanningElement, const HeaderElements & subElements ) { using namespace std; if( verbose && header_changed ) { for( auto & it : metadataColumns ) { std::cout << std::setw( 20 ) << it.first; Loading Loading @@ -163,8 +168,6 @@ public: writeTableRow( const String & spanningElement, const RowElements & subElements ) { using namespace std; if( verbose ) { for( auto & it : metadataColumns ) { std::cout << std::setw( 20 ) << it.second; Loading Loading @@ -278,6 +281,27 @@ protected: }; struct BenchmarkResult { using HeaderElements = Logging::HeaderElements; using RowElements = Logging::RowElements; double bandwidth = std::numeric_limits<double>::quiet_NaN(); double time = std::numeric_limits<double>::quiet_NaN(); double speedup = std::numeric_limits<double>::quiet_NaN(); virtual HeaderElements getTableHeader() const { return HeaderElements({"bandwidth", "time", "speedup"}); } virtual RowElements getRowElements() const { return RowElements({ bandwidth, time, speedup }); } }; class Benchmark : protected Logging { Loading Loading @@ -305,7 +329,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); } // Marks the start of a new benchmark (with custom metadata) Loading @@ -315,7 +338,6 @@ public: { closeTable(); writeTitle( title ); monitor.setStage( title.getString() ); // add loops to metadata metadata["loops"] = String(loops); writeMetadata( metadata ); Loading @@ -342,6 +364,7 @@ public: const double datasetSize = 0.0, // in GB const double baseTime = 0.0 ) { monitor.setStage( operation.getString() ); if( metadataColumns.size() > 0 && String(metadataColumns[ 0 ].first) == "operation" ) { metadataColumns[ 0 ].second = operation; } Loading Loading @@ -393,43 +416,45 @@ public: double time( ResetFunction reset, const String & performer, ComputeFunction & compute ) ComputeFunction & compute, BenchmarkResult & result ) { double time; result.time = std::numeric_limits<double>::quiet_NaN(); try { if( verbose ) { // run the monitor main loop Solvers::SolverMonitorThread monitor_thread( monitor ); time = timeFunction( compute, reset, loops, monitor ); result.time = timeFunction( compute, reset, loops, monitor ); } else { time = timeFunction( compute, reset, loops, monitor ); result.time = timeFunction( compute, reset, loops, monitor ); } } catch ( const std::exception& e ) { std::cerr << "timeFunction failed due to a C++ exception with description: " << e.what() << std::endl; } const double bandwidth = datasetSize / time; const double speedup = this->baseTime / time; result.bandwidth = datasetSize / result.time; result.speedup = this->baseTime / result.time; if( this->baseTime == 0.0 ) this->baseTime = time; this->baseTime = result.time; writeTableHeader( performer, HeaderElements({"bandwidth", "time", "speedup"}) ); writeTableRow( performer, RowElements({ bandwidth, time, speedup }) ); writeTableHeader( performer, result.getTableHeader() ); writeTableRow( performer, result.getRowElements() ); return this->baseTime; } // Recursive template function to deal with multiple computations with the // same reset function. template< typename ResetFunction, typename ComputeFunction, typename... NextComputations > inline double time( ResetFunction reset, const String & performer, ComputeFunction & compute, NextComputations & ... nextComputations ) ComputeFunction & compute ) { time( reset, performer, compute ); time( reset, nextComputations... ); return this->baseTime; BenchmarkResult result; return time( reset, performer, compute, result ); } // Adds an error message to the log. Should be called in places where the Loading @@ -445,6 +470,12 @@ public: using Logging::save; Solvers::IterativeSolverMonitor< double, int >& getMonitor() { return monitor; } protected: int loops; double datasetSize = 0.0; Loading @@ -452,5 +483,49 @@ protected: Solvers::IterativeSolverMonitor< double, int > monitor; }; Benchmark::MetadataMap getHardwareMetadata() { const int cpu_id = 0; Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = String( cacheSizes.L1data ) + ", " + String( cacheSizes.L1instruction ) + ", " + String( cacheSizes.L2 ) + ", " + String( cacheSizes.L3 ); #ifdef HAVE_CUDA const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); const String deviceArch = String( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + String( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { { "host name", Devices::SystemInfo::getHostname() }, { "architecture", Devices::SystemInfo::getArchitecture() }, { "system", Devices::SystemInfo::getSystemName() }, { "system release", Devices::SystemInfo::getSystemRelease() }, { "start time", Devices::SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) }, #endif { "OpenMP enabled", Devices::Host::isOMPEnabled() }, { "OpenMP threads", Devices::Host::getMaxThreadsCount() }, { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, { "CPU cores", Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU threads per core", Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) }, { "CPU max frequency (MHz)", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, { "GPU CUDA cores", Devices::CudaDeviceInfo::getCudaCores( activeGPU ) }, { "GPU clock rate (MHz)", (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 }, { "GPU global memory (GB)", (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 }, { "GPU memory clock rate (MHz)", (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 }, { "GPU memory ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) }, #endif }; return metadata; } } // namespace Benchmarks } // namespace TNL
src/Benchmarks/CMakeLists.txt +1 −0 Original line number Diff line number Diff line add_subdirectory( HeatEquation ) add_subdirectory( BLAS ) add_subdirectory( SpMV ) add_subdirectory( DistSpMV ) add_subdirectory( LinearSolvers ) set( headers Loading
src/Benchmarks/DistSpMV/CMakeLists.txt 0 → 100644 +11 −0 Original line number Diff line number Diff line if( BUILD_CUDA ) cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu ) target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl ) install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin ) endif() add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp ) target_link_libraries( tnl-benchmark-distributed-spmv tnl ) install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin )