Commit 743414e0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added more cuBLAS benchmarks

parent bae182b2
Loading
Loading
Loading
Loading
+96 −0
Original line number Diff line number Diff line
@@ -5,6 +5,70 @@

#include <cublas_v2.h>

inline cublasStatus_t
cublasIgamax( cublasHandle_t handle, int n,
              const float           *x, int incx, int *result )
{
    return cublasIsamax( handle, n, x, incx, result );
}

inline cublasStatus_t
cublasIgamax( cublasHandle_t handle, int n,
              const double          *x, int incx, int *result )
{
    return cublasIdamax( handle, n, x, incx, result );
}


inline cublasStatus_t
cublasIgamin( cublasHandle_t handle, int n,
              const float           *x, int incx, int *result )
{
    return cublasIsamin( handle, n, x, incx, result );
}

inline cublasStatus_t
cublasIgamin( cublasHandle_t handle, int n,
              const double          *x, int incx, int *result )
{
    return cublasIdamin( handle, n, x, incx, result );
}


inline cublasStatus_t
cublasGasum( cublasHandle_t handle, int n,
             const float           *x, int incx, float  *result )
{
    return cublasSasum( handle, n, x, incx, result );
}

inline cublasStatus_t
cublasGasum( cublasHandle_t handle, int n,
             const double          *x, int incx, double *result )
{
    return cublasDasum( handle, n, x, incx, result );
}


inline cublasStatus_t
cublasGaxpy( cublasHandle_t handle, int n,
             const float           *alpha,
             const float           *x, int incx,
             float                 *y, int incy )
{
    return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
}

inline cublasStatus_t
cublasGaxpy( cublasHandle_t handle, int n,
             const double          *alpha,
             const double          *x, int incx,
             double                *y, int incy )
{
    return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
}


inline cublasStatus_t
cublasGdot( cublasHandle_t handle, int n,
            const float        *x, int incx,
@@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n,
    return cublasDdot( handle, n, x, incx, y, incy, result );
}


inline cublasStatus_t
cublasGnrm2( cublasHandle_t handle, int n,
             const float           *x, int incx, float  *result )
{
    return cublasSnrm2( handle, n, x, incx, result );
}

inline cublasStatus_t
cublasGnrm2( cublasHandle_t handle, int n,
             const double          *x, int incx, double *result )
{
    return cublasDnrm2( handle, n, x, incx, result );
}


inline cublasStatus_t
cublasGscal( cublasHandle_t handle, int n,
             const float           *alpha,
             float           *x, int incx )
{
    return cublasSscal( handle, n, alpha, x, incx );
}

inline cublasStatus_t
cublasGscal( cublasHandle_t handle, int n,
             const double          *alpha,
             double          *x, int incx )
{
    return cublasDscal( handle, n, alpha, x, incx );
}

#endif
#endif
+79 −6
Original line number Diff line number Diff line
@@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto multiplyCuda = [&]() {
        deviceVector *= 0.5;
    };
#ifdef HAVE_CUBLAS
    auto multiplyCublas = [&]() {
        const Real alpha = 0.5;
        cublasGscal( cublasHandle, size,
                     &alpha,
                     deviceVector.getData(), 1 );
    };
#endif
    benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
    benchmark.time( reset1,
                    "CPU", multiplyHost,
                    "GPU", multiplyCuda );
                    "GPU", multiplyCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", multiplyCublas
#endif
                  );


    auto addVectorHost = [&]() {
@@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto addVectorCuda = [&]() {
        deviceVector.addVector( deviceVector2 );
    };
#ifdef HAVE_CUBLAS
    auto addVectorCublas = [&]() {
        const Real alpha = 1.0;
        cublasGaxpy( cublasHandle, size,
                     &alpha,
                     deviceVector2.getData(), 1,
                     deviceVector.getData(), 1 );
    };
#endif
    benchmark.setOperation( "vector addition", 3 * datasetSize );
    benchmark.time( reset1,
                    "CPU", addVectorHost,
                    "GPU", addVectorCuda );
                    "GPU", addVectorCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", addVectorCublas
#endif
                  );


    auto maxHost = [&]() {
@@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto absMaxCuda = [&]() {
        resultDevice = deviceVector.absMax();
    };
#ifdef HAVE_CUBLAS
    auto absMaxCublas = [&]() {
        int index = 0;
        cublasIgamax( cublasHandle, size,
                      deviceVector.getData(), 1,
                      &index );
        resultDevice = deviceVector.getElement( index );
    };
#endif
    benchmark.setOperation( "absMax", datasetSize );
    benchmark.time( reset1,
                    "CPU", absMaxHost,
                    "GPU", absMaxCuda );
                    "GPU", absMaxCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", absMaxCublas
#endif
                  );


    auto absMinHost = [&]() {
@@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto absMinCuda = [&]() {
        resultDevice = deviceVector.absMin();
    };
#ifdef HAVE_CUBLAS
    auto absMinCublas = [&]() {
        int index = 0;
        cublasIgamin( cublasHandle, size,
                      deviceVector.getData(), 1,
                      &index );
        resultDevice = deviceVector.getElement( index );
    };
#endif
    benchmark.setOperation( "absMin", datasetSize );
    benchmark.time( reset1,
                    "CPU", absMinHost,
                    "GPU", absMinCuda );
                    "GPU", absMinCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", absMinCublas
#endif
                  );


    auto sumHost = [&]() {
@@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto l1normCuda = [&]() {
        resultDevice = deviceVector.lpNorm( 1.0 );
    };
#ifdef HAVE_CUBLAS
    auto l1normCublas = [&]() {
        cublasGasum( cublasHandle, size,
                     deviceVector.getData(), 1,
                     &resultDevice );
    };
#endif
    benchmark.setOperation( "l1 norm", datasetSize );
    benchmark.time( reset1,
                    "CPU", l1normHost,
                    "GPU", l1normCuda );
                    "GPU", l1normCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", l1normCublas
#endif
                  );


    auto l2normHost = [&]() {
@@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
    auto l2normCuda = [&]() {
        resultDevice = deviceVector.lpNorm( 2.0 );
    };
#ifdef HAVE_CUBLAS
    auto l2normCublas = [&]() {
        cublasGnrm2( cublasHandle, size,
                     deviceVector.getData(), 1,
                     &resultDevice );
    };
#endif
    benchmark.setOperation( "l2 norm", datasetSize );
    benchmark.time( reset1,
                    "CPU", l2normHost,
                    "GPU", l2normCuda );
                    "GPU", l2normCuda
#ifdef HAVE_CUBLAS
                  , "cuBLAS", l2normCublas
#endif
                  );


    auto l3normHost = [&]() {