Commit c4d018b8 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed BLAS benchmark

Calculation of bandwidth broke in commit fb97bcd0

Check of MpiCommunicator::IsInitialized is necessary because
non-distributed benchmarks like tnl-benchmark-blas do not initialize
the communicator.
parent 679d1979
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ benchmarkArrayOperations( Benchmark & benchmark,
   typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
   using namespace std;

   double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
   double datasetSize = (double) size * sizeof( Real ) / oneGB;

   HostArray hostArray, hostArray2;
   CudaArray deviceArray, deviceArray2;
+1 −1
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ benchmarkSpMV( Benchmark & benchmark,

   const int elements = setHostTestMatrix< HostMatrix >( hostMatrix, elementsPerRow );
   setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
   const double datasetSize = ( double ) loops * elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
   const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;

   // reset function
   auto reset = [&]() {
+3 −3
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
   typedef Containers::Vector< Real, Devices::Cuda, Index > CudaVector;
   using namespace std;

   double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
   double datasetSize = (double) size * sizeof( Real ) / oneGB;

   HostVector hostVector, hostVector2;
   CudaVector deviceVector, deviceVector2;
@@ -252,7 +252,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
   hostVector.computePrefixSum();
   timer.stop();
   timeHost = timer.getTime();
   bandwidth = 2 * datasetSize / loops / timer.getTime();
   bandwidth = 2 * datasetSize / timer.getTime();
   std::cout << "  CPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;

   timer.reset();
@@ -260,7 +260,7 @@ benchmarkVectorOperations( Benchmark & benchmark,
   deviceVector.computePrefixSum();
   timer.stop();
   timeDevice = timer.getTime();
   bandwidth = 2 * datasetSize / loops / timer.getTime();
   bandwidth = 2 * datasetSize / timer.getTime();
   std::cout << "  GPU: bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << std::endl;
   std::cout << "  CPU/GPU speedup: " << timeHost / timeDevice << std::endl;

+3 −1
Original line number Diff line number Diff line
@@ -504,7 +504,9 @@ Benchmark::MetadataMap getHardwareMetadata()
       { "system release", Devices::SystemInfo::getSystemRelease() },
       { "start time", Devices::SystemInfo::getCurrentTime() },
#ifdef HAVE_MPI
       { "number of MPI processes", Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) },
       { "number of MPI processes", (Communicators::MpiCommunicator::IsInitialized())
                                       ? Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup )
                                       : 1 },
#endif
       { "OpenMP enabled", Devices::Host::isOMPEnabled() },
       { "OpenMP threads", Devices::Host::getMaxThreadsCount() },