Added more cuBLAS benchmarks

743414e0 · Jakub Klinkovský · bae182b2 · 743414e0 · 743414e0
Commit 743414e0 authored 9 years ago by Jakub Klinkovský
--- a/tests/benchmarks/cublasWrappers.h
+++ b/tests/benchmarks/cublasWrappers.h
@@ -5,6 +5,70 @@
 #include <cublas_v2.h>
+inline cublasStatus_t
+cublasIgamax( cublasHandle_t handle, int n,
+              const float           *x, int incx, int *result )
+{
+    return cublasIsamax( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasIgamax( cublasHandle_t handle, int n,
+              const double          *x, int incx, int *result )
+{
+    return cublasIdamax( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasIgamin( cublasHandle_t handle, int n,
+              const float           *x, int incx, int *result )
+{
+    return cublasIsamin( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasIgamin( cublasHandle_t handle, int n,
+              const double          *x, int incx, int *result )
+{
+    return cublasIdamin( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasGasum( cublasHandle_t handle, int n,
+             const float           *x, int incx, float  *result )
+{
+    return cublasSasum( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasGasum( cublasHandle_t handle, int n,
+             const double          *x, int incx, double *result )
+{
+    return cublasDasum( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasGaxpy( cublasHandle_t handle, int n,
+             const float           *alpha,
+             const float           *x, int incx,
+             float                 *y, int incy )
+{
+    return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
+}
+inline cublasStatus_t
+cublasGaxpy( cublasHandle_t handle, int n,
+             const double          *alpha,
+             const double          *x, int incx,
+             double                *y, int incy )
+{
+    return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
+}
 inline cublasStatus_t
 cublasGdot( cublasHandle_t handle, int n,
            const float        *x, int incx,
@@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n,
    return cublasDdot( handle, n, x, incx, y, incy, result );
 }
+inline cublasStatus_t
+cublasGnrm2( cublasHandle_t handle, int n,
+             const float           *x, int incx, float  *result )
+{
+    return cublasSnrm2( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasGnrm2( cublasHandle_t handle, int n,
+             const double          *x, int incx, double *result )
+{
+    return cublasDnrm2( handle, n, x, incx, result );
+}
+inline cublasStatus_t
+cublasGscal( cublasHandle_t handle, int n,
+             const float           *alpha,
+             float           *x, int incx )
+{
+    return cublasSscal( handle, n, alpha, x, incx );
+}
+inline cublasStatus_t
+cublasGscal( cublasHandle_t handle, int n,
+             const double          *alpha,
+             double          *x, int incx )
+{
+    return cublasDscal( handle, n, alpha, x, incx );
+}
 #endif
 #endif
--- a/tests/benchmarks/vector-operations.h
+++ b/tests/benchmarks/vector-operations.h
@@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto multiplyCuda = [&]() {
        deviceVector *= 0.5;
    };
+#ifdef HAVE_CUBLAS
+    auto multiplyCublas = [&]() {
+        const Real alpha = 0.5;
+        cublasGscal( cublasHandle, size,
+                     &alpha,
+                     deviceVector.getData(), 1 );
+    };
+#endif
    benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
    benchmark.time( reset1,
                    "CPU", multiplyHost,
-                    "GPU", multiplyCuda );
+                    "GPU", multiplyCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", multiplyCublas
+#endif
+                  );
    auto addVectorHost = [&]() {
@@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto addVectorCuda = [&]() {
        deviceVector.addVector( deviceVector2 );
    };
+#ifdef HAVE_CUBLAS
+    auto addVectorCublas = [&]() {
+        const Real alpha = 1.0;
+        cublasGaxpy( cublasHandle, size,
+                     &alpha,
+                     deviceVector2.getData(), 1,
+                     deviceVector.getData(), 1 );
+    };
+#endif
    benchmark.setOperation( "vector addition", 3 * datasetSize );
    benchmark.time( reset1,
                    "CPU", addVectorHost,
-                    "GPU", addVectorCuda );
+                    "GPU", addVectorCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", addVectorCublas
+#endif
+                  );
    auto maxHost = [&]() {
@@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto absMaxCuda = [&]() {
        resultDevice = deviceVector.absMax();
    };
+#ifdef HAVE_CUBLAS
+    auto absMaxCublas = [&]() {
+        int index = 0;
+        cublasIgamax( cublasHandle, size,
+                      deviceVector.getData(), 1,
+                      &index );
+        resultDevice = deviceVector.getElement( index );
+    };
+#endif
    benchmark.setOperation( "absMax", datasetSize );
    benchmark.time( reset1,
                    "CPU", absMaxHost,
-                    "GPU", absMaxCuda );
+                    "GPU", absMaxCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", absMaxCublas
+#endif
+                  );
    auto absMinHost = [&]() {
@@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto absMinCuda = [&]() {
        resultDevice = deviceVector.absMin();
    };
+#ifdef HAVE_CUBLAS
+    auto absMinCublas = [&]() {
+        int index = 0;
+        cublasIgamin( cublasHandle, size,
+                      deviceVector.getData(), 1,
+                      &index );
+        resultDevice = deviceVector.getElement( index );
+    };
+#endif
    benchmark.setOperation( "absMin", datasetSize );
    benchmark.time( reset1,
                    "CPU", absMinHost,
-                    "GPU", absMinCuda );
+                    "GPU", absMinCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", absMinCublas
+#endif
+                  );
    auto sumHost = [&]() {
@@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto l1normCuda = [&]() {
        resultDevice = deviceVector.lpNorm( 1.0 );
    };
+#ifdef HAVE_CUBLAS
+    auto l1normCublas = [&]() {
+        cublasGasum( cublasHandle, size,
+                     deviceVector.getData(), 1,
+                     &resultDevice );
+    };
+#endif
    benchmark.setOperation( "l1 norm", datasetSize );
    benchmark.time( reset1,
                    "CPU", l1normHost,
-                    "GPU", l1normCuda );
+                    "GPU", l1normCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", l1normCublas
+#endif
+                  );
    auto l2normHost = [&]() {
@@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto l2normCuda = [&]() {
        resultDevice = deviceVector.lpNorm( 2.0 );
    };
+#ifdef HAVE_CUBLAS
+    auto l2normCublas = [&]() {
+        cublasGnrm2( cublasHandle, size,
+                     deviceVector.getData(), 1,
+                     &resultDevice );
+    };
+#endif
    benchmark.setOperation( "l2 norm", datasetSize );
    benchmark.time( reset1,
                    "CPU", l2normHost,
-                    "GPU", l2normCuda );
+                    "GPU", l2normCuda
+#ifdef HAVE_CUBLAS
+                  , "cuBLAS", l2normCublas
+#endif
+                  );
    auto l3normHost = [&]() {