From fb1474d392db8d6a02679f20661e9e54856a1177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz>
Date: Thu, 10 Dec 2015 00:47:11 +0100
Subject: [PATCH] Added benchmarks for lpNorm to tnl-cuda-benchmarks

---
 tests/benchmarks/tnl-cuda-benchmarks.h | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h
index 919b4ebe82..17ae5c04b3 100644
--- a/tests/benchmarks/tnl-cuda-benchmarks.h
+++ b/tests/benchmarks/tnl-cuda-benchmarks.h
@@ -101,6 +101,7 @@ int main( int argc, char* argv[] )
     */
 
    Real resultHost, resultDevice, timeHost, timeDevice;
+
    cout << "Benchmarking scalar product on CPU: ";
    timer.reset();
    timer.start();
@@ -147,6 +148,35 @@ int main( int argc, char* argv[] )
 #endif    
 #endif
    
+
+   cout << "Benchmarking lpNorm on CPU: ";
+   timer.reset();
+   timer.start();
+   for( int i = 0; i < loops; i++ )
+     resultHost = hostVector.lpNorm( 2.0 );
+   timer.stop();
+   timeHost = timer.getTime();
+   bandwidth = 2 * datasetSize / timer.getTime();
+   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
+    
+   cout << "Benchmarking lpNorm on GPU: ";
+   timer.reset();
+   timer.start();
+   for( int i = 0; i < loops; i++ )
+      resultDevice = deviceVector.lpNorm( 2.0 );
+   timer.stop();
+   timeDevice = timer.getTime();
+   bandwidth = 2 * datasetSize / timer.getTime();
+   cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
+   cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl;
+
+   if( resultHost != resultDevice )
+   {
+      cerr << "Error. " << resultHost << " != " << resultDevice << endl;
+      //return EXIT_FAILURE;
+   }
+
+
    cout << "Benchmarking prefix-sum on CPU: ";
    timer.reset();
    timer.start();
-- 
GitLab