diff --git a/tests/benchmarks/cublasWrappers.h b/tests/benchmarks/cublasWrappers.h index 6369c0f12f34f9deb6ca2094f4255f9ab9142ed9..a7520a34d5d13ced796d8b2366b0382887f09a49 100644 --- a/tests/benchmarks/cublasWrappers.h +++ b/tests/benchmarks/cublasWrappers.h @@ -5,6 +5,70 @@ #include <cublas_v2.h> +inline cublasStatus_t +cublasIgamax( cublasHandle_t handle, int n, + const float *x, int incx, int *result ) +{ + return cublasIsamax( handle, n, x, incx, result ); +} + +inline cublasStatus_t +cublasIgamax( cublasHandle_t handle, int n, + const double *x, int incx, int *result ) +{ + return cublasIdamax( handle, n, x, incx, result ); +} + + +inline cublasStatus_t +cublasIgamin( cublasHandle_t handle, int n, + const float *x, int incx, int *result ) +{ + return cublasIsamin( handle, n, x, incx, result ); +} + +inline cublasStatus_t +cublasIgamin( cublasHandle_t handle, int n, + const double *x, int incx, int *result ) +{ + return cublasIdamin( handle, n, x, incx, result ); +} + + +inline cublasStatus_t +cublasGasum( cublasHandle_t handle, int n, + const float *x, int incx, float *result ) +{ + return cublasSasum( handle, n, x, incx, result ); +} + +inline cublasStatus_t +cublasGasum( cublasHandle_t handle, int n, + const double *x, int incx, double *result ) +{ + return cublasDasum( handle, n, x, incx, result ); +} + + +inline cublasStatus_t +cublasGaxpy( cublasHandle_t handle, int n, + const float *alpha, + const float *x, int incx, + float *y, int incy ) +{ + return cublasSaxpy( handle, n, alpha, x, incx, y, incy ); +} + +inline cublasStatus_t +cublasGaxpy( cublasHandle_t handle, int n, + const double *alpha, + const double *x, int incx, + double *y, int incy ) +{ + return cublasDaxpy( handle, n, alpha, x, incx, y, incy ); +} + + inline cublasStatus_t cublasGdot( cublasHandle_t handle, int n, const float *x, int incx, @@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n, return cublasDdot( handle, n, x, incx, y, incy, result ); } + +inline cublasStatus_t +cublasGnrm2( cublasHandle_t handle, int n, + const float *x, int incx, float *result ) +{ + return cublasSnrm2( handle, n, x, incx, result ); +} + +inline cublasStatus_t +cublasGnrm2( cublasHandle_t handle, int n, + const double *x, int incx, double *result ) +{ + return cublasDnrm2( handle, n, x, incx, result ); +} + + +inline cublasStatus_t +cublasGscal( cublasHandle_t handle, int n, + const float *alpha, + float *x, int incx ) +{ + return cublasSscal( handle, n, alpha, x, incx ); +} + +inline cublasStatus_t +cublasGscal( cublasHandle_t handle, int n, + const double *alpha, + double *x, int incx ) +{ + return cublasDscal( handle, n, alpha, x, incx ); +} + #endif #endif diff --git a/tests/benchmarks/vector-operations.h b/tests/benchmarks/vector-operations.h index 3876235c09163b7b79171d0ed8e465006de27e33..5d15af0ff13b2fb4d64f69e58b8fc6135cd7c9c6 100644 --- a/tests/benchmarks/vector-operations.h +++ b/tests/benchmarks/vector-operations.h @@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark, auto multiplyCuda = [&]() { deviceVector *= 0.5; }; +#ifdef HAVE_CUBLAS + auto multiplyCublas = [&]() { + const Real alpha = 0.5; + cublasGscal( cublasHandle, size, + &alpha, + deviceVector.getData(), 1 ); + }; +#endif benchmark.setOperation( "scalar multiplication", 2 * datasetSize ); benchmark.time( reset1, "CPU", multiplyHost, - "GPU", multiplyCuda ); + "GPU", multiplyCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", multiplyCublas +#endif + ); auto addVectorHost = [&]() { @@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto addVectorCuda = [&]() { deviceVector.addVector( deviceVector2 ); }; +#ifdef HAVE_CUBLAS + auto addVectorCublas = [&]() { + const Real alpha = 1.0; + cublasGaxpy( cublasHandle, size, + &alpha, + deviceVector2.getData(), 1, + deviceVector.getData(), 1 ); + }; +#endif benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.time( reset1, "CPU", addVectorHost, - "GPU", addVectorCuda ); + "GPU", addVectorCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", addVectorCublas +#endif + ); auto maxHost = [&]() { @@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; +#ifdef HAVE_CUBLAS + auto absMaxCublas = [&]() { + int index = 0; + cublasIgamax( cublasHandle, size, + deviceVector.getData(), 1, + &index ); + resultDevice = deviceVector.getElement( index ); + }; +#endif benchmark.setOperation( "absMax", datasetSize ); benchmark.time( reset1, "CPU", absMaxHost, - "GPU", absMaxCuda ); + "GPU", absMaxCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", absMaxCublas +#endif + ); auto absMinHost = [&]() { @@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; +#ifdef HAVE_CUBLAS + auto absMinCublas = [&]() { + int index = 0; + cublasIgamin( cublasHandle, size, + deviceVector.getData(), 1, + &index ); + resultDevice = deviceVector.getElement( index ); + }; +#endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time( reset1, "CPU", absMinHost, - "GPU", absMinCuda ); + "GPU", absMinCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", absMinCublas +#endif + ); auto sumHost = [&]() { @@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; +#ifdef HAVE_CUBLAS + auto l1normCublas = [&]() { + cublasGasum( cublasHandle, size, + deviceVector.getData(), 1, + &resultDevice ); + }; +#endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time( reset1, "CPU", l1normHost, - "GPU", l1normCuda ); + "GPU", l1normCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", l1normCublas +#endif + ); auto l2normHost = [&]() { @@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; +#ifdef HAVE_CUBLAS + auto l2normCublas = [&]() { + cublasGnrm2( cublasHandle, size, + deviceVector.getData(), 1, + &resultDevice ); + }; +#endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time( reset1, "CPU", l2normHost, - "GPU", l2normCuda ); + "GPU", l2normCuda +#ifdef HAVE_CUBLAS + , "cuBLAS", l2normCublas +#endif + ); auto l3normHost = [&]() {