From 4bfcb78130076f46c67788e6986a2dfd54996b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz> Date: Wed, 9 Dec 2015 23:27:46 +0100 Subject: [PATCH] Fixed order of fields printed to stdout in the CUDA benchmark --- tests/benchmarks/tnl-cuda-benchmarks.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h index 09993df3fb..f45df99b3c 100644 --- a/tests/benchmarks/tnl-cuda-benchmarks.h +++ b/tests/benchmarks/tnl-cuda-benchmarks.h @@ -139,24 +139,29 @@ int main( int argc, char* argv[] ) cout << bandwidth << " GB/sec." << endl; */ - Real resultHost, resultDevice; + Real resultHost, resultDevice, timeHost, timeDevice; + cout << "Benchmarking scalar product on CPU: "; timer.reset(); timer.start(); for( int i = 0; i < loops; i++ ) resultHost = hostVector.scalarProduct( hostVector2 ); timer.stop(); + timeHost = timer.getTime(); bandwidth = 2 * datasetSize / timer.getTime(); - cout << bandwidth << " GB/sec." << endl; + cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl; - cout << "Benchmarking scalar product on GPU: " << endl; + cout << "Benchmarking scalar product on GPU: "; timer.reset(); timer.start(); for( int i = 0; i < loops; i++ ) resultDevice = deviceVector.scalarProduct( deviceVector2 ); timer.stop(); + timeDevice = timer.getTime(); bandwidth = 2 * datasetSize / timer.getTime(); - cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; + cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl; + cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl; + if( resultHost != resultDevice ) { cerr << "Error. " << resultHost << " != " << resultDevice << endl; @@ -210,19 +215,20 @@ int main( int argc, char* argv[] ) timer.reset(); timer.start(); hostVector.computePrefixSum(); - cout << "Time: " << timer.getTime() << endl; timer.stop(); + timeHost = timer.getTime(); bandwidth = 2 * datasetSize / loops / timer.getTime(); - cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; + cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl; - cout << "Benchmarking prefix-sum on GPU ..." << endl; + cout << "Benchmarking prefix-sum on GPU: "; timer.reset(); timer.start(); deviceVector.computePrefixSum(); - cout << "Time: " << timer.getTime() << endl; timer.stop(); + timeDevice = timer.getTime(); bandwidth = 2 * datasetSize / loops / timer.getTime(); - cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; + cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl; + cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl; for( int i = 0; i < size; i++ ) if( hostVector.getElement( i ) != deviceVector.getElement( i ) ) -- GitLab