Commit 4bfcb781 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed order of fields printed to stdout in the CUDA benchmark

parent 735da4f5
Loading
Loading
Loading
Loading
+15 −9
Original line number Diff line number Diff line
@@ -139,24 +139,29 @@ int main( int argc, char* argv[] )
    cout << bandwidth << " GB/sec." << endl;
    */

   Real resultHost, resultDevice;
   Real resultHost, resultDevice, timeHost, timeDevice;

   cout << "Benchmarking scalar product on CPU: ";
   timer.reset();
   timer.start();
   for( int i = 0; i < loops; i++ )
     resultHost = hostVector.scalarProduct( hostVector2 );
   timer.stop();
   timeHost = timer.getTime();
   bandwidth = 2 * datasetSize / timer.getTime();
   cout << bandwidth << " GB/sec." << endl;
   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
    
   cout << "Benchmarking scalar product on GPU: " << endl;
   cout << "Benchmarking scalar product on GPU: ";
   timer.reset();
   timer.start();
   for( int i = 0; i < loops; i++ )
      resultDevice = deviceVector.scalarProduct( deviceVector2 );
   timer.stop();
   timeDevice = timer.getTime();
   bandwidth = 2 * datasetSize / timer.getTime();
   cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl;
   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
   cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl;

   if( resultHost != resultDevice )
   {
      cerr << "Error. " << resultHost << " != " << resultDevice << endl;
@@ -210,19 +215,20 @@ int main( int argc, char* argv[] )
   timer.reset();
   timer.start();
   hostVector.computePrefixSum();
   cout << "Time: " << timer.getTime() << endl;
   timer.stop();
   timeHost = timer.getTime();
   bandwidth = 2 * datasetSize / loops / timer.getTime();
   cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl;
   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
   
   cout << "Benchmarking prefix-sum on GPU ..." << endl;
   cout << "Benchmarking prefix-sum on GPU: ";
   timer.reset();
   timer.start();
   deviceVector.computePrefixSum();
   cout << "Time: " << timer.getTime() << endl;
   timer.stop();
   timeDevice = timer.getTime();
   bandwidth = 2 * datasetSize / loops / timer.getTime();
   cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl;
   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
   cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl;

   for( int i = 0; i < size; i++ )
      if( hostVector.getElement( i ) != deviceVector.getElement( i ) )