Loading src/Benchmarks/BLAS/array-operations.h +32 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <cstring> #include "../Benchmarks.h" #include <TNL/Containers/Array.h> Loading Loading @@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark, reset12(); if( std::is_fundamental< Real >::value ) { // std::memcmp auto compareHost = [&]() { if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 ) resultHost = true; else resultHost = false; }; benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset12, "CPU", compareHost ); // std::memcpy and cudaMemcpy auto copyHost = [&]() { std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ); }; benchmark.setOperation( "copy (memcpy)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset12, "CPU", copyHost ); #ifdef HAVE_CUDA auto copyCuda = [&]() { cudaMemcpy( deviceArray.getData(), deviceArray2.getData(), deviceArray.getSize() * sizeof(Real), cudaMemcpyDeviceToDevice ); TNL_CHECK_CUDA_DEVICE; }; benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda ); #endif } auto compareHost = [&]() { resultHost = (int) ( hostArray == hostArray2 ); }; Loading src/Benchmarks/BLAS/tnl-benchmark-blas.h +1 −1 Original line number Diff line number Diff line Loading @@ -182,7 +182,7 @@ main( int argc, char* argv[] ) runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow ); if( ! benchmark.save( logFile ) ) { std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl; return EXIT_FAILURE; } Loading Loading
src/Benchmarks/BLAS/array-operations.h +32 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <cstring> #include "../Benchmarks.h" #include <TNL/Containers/Array.h> Loading Loading @@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark, reset12(); if( std::is_fundamental< Real >::value ) { // std::memcmp auto compareHost = [&]() { if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 ) resultHost = true; else resultHost = false; }; benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset12, "CPU", compareHost ); // std::memcpy and cudaMemcpy auto copyHost = [&]() { std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ); }; benchmark.setOperation( "copy (memcpy)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset12, "CPU", copyHost ); #ifdef HAVE_CUDA auto copyCuda = [&]() { cudaMemcpy( deviceArray.getData(), deviceArray2.getData(), deviceArray.getSize() * sizeof(Real), cudaMemcpyDeviceToDevice ); TNL_CHECK_CUDA_DEVICE; }; benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda ); #endif } auto compareHost = [&]() { resultHost = (int) ( hostArray == hostArray2 ); }; Loading
src/Benchmarks/BLAS/tnl-benchmark-blas.h +1 −1 Original line number Diff line number Diff line Loading @@ -182,7 +182,7 @@ main( int argc, char* argv[] ) runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow ); if( ! benchmark.save( logFile ) ) { std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl; return EXIT_FAILURE; } Loading