Skip to content
Snippets Groups Projects
Commit 743414e0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added more cuBLAS benchmarks

parent bae182b2
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,70 @@ ...@@ -5,6 +5,70 @@
#include <cublas_v2.h> #include <cublas_v2.h>
inline cublasStatus_t
cublasIgamax( cublasHandle_t handle, int n,
const float *x, int incx, int *result )
{
return cublasIsamax( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasIgamax( cublasHandle_t handle, int n,
const double *x, int incx, int *result )
{
return cublasIdamax( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasIgamin( cublasHandle_t handle, int n,
const float *x, int incx, int *result )
{
return cublasIsamin( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasIgamin( cublasHandle_t handle, int n,
const double *x, int incx, int *result )
{
return cublasIdamin( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasGasum( cublasHandle_t handle, int n,
const float *x, int incx, float *result )
{
return cublasSasum( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasGasum( cublasHandle_t handle, int n,
const double *x, int incx, double *result )
{
return cublasDasum( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasGaxpy( cublasHandle_t handle, int n,
const float *alpha,
const float *x, int incx,
float *y, int incy )
{
return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
}
inline cublasStatus_t
cublasGaxpy( cublasHandle_t handle, int n,
const double *alpha,
const double *x, int incx,
double *y, int incy )
{
return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
}
inline cublasStatus_t inline cublasStatus_t
cublasGdot( cublasHandle_t handle, int n, cublasGdot( cublasHandle_t handle, int n,
const float *x, int incx, const float *x, int incx,
...@@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n, ...@@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n,
return cublasDdot( handle, n, x, incx, y, incy, result ); return cublasDdot( handle, n, x, incx, y, incy, result );
} }
inline cublasStatus_t
cublasGnrm2( cublasHandle_t handle, int n,
const float *x, int incx, float *result )
{
return cublasSnrm2( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasGnrm2( cublasHandle_t handle, int n,
const double *x, int incx, double *result )
{
return cublasDnrm2( handle, n, x, incx, result );
}
inline cublasStatus_t
cublasGscal( cublasHandle_t handle, int n,
const float *alpha,
float *x, int incx )
{
return cublasSscal( handle, n, alpha, x, incx );
}
inline cublasStatus_t
cublasGscal( cublasHandle_t handle, int n,
const double *alpha,
double *x, int incx )
{
return cublasDscal( handle, n, alpha, x, incx );
}
#endif #endif
#endif #endif
...@@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto multiplyCuda = [&]() { auto multiplyCuda = [&]() {
deviceVector *= 0.5; deviceVector *= 0.5;
}; };
#ifdef HAVE_CUBLAS
auto multiplyCublas = [&]() {
const Real alpha = 0.5;
cublasGscal( cublasHandle, size,
&alpha,
deviceVector.getData(), 1 );
};
#endif
benchmark.setOperation( "scalar multiplication", 2 * datasetSize ); benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", multiplyHost, "CPU", multiplyHost,
"GPU", multiplyCuda ); "GPU", multiplyCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", multiplyCublas
#endif
);
auto addVectorHost = [&]() { auto addVectorHost = [&]() {
...@@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto addVectorCuda = [&]() { auto addVectorCuda = [&]() {
deviceVector.addVector( deviceVector2 ); deviceVector.addVector( deviceVector2 );
}; };
#ifdef HAVE_CUBLAS
auto addVectorCublas = [&]() {
const Real alpha = 1.0;
cublasGaxpy( cublasHandle, size,
&alpha,
deviceVector2.getData(), 1,
deviceVector.getData(), 1 );
};
#endif
benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.setOperation( "vector addition", 3 * datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", addVectorHost, "CPU", addVectorHost,
"GPU", addVectorCuda ); "GPU", addVectorCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", addVectorCublas
#endif
);
auto maxHost = [&]() { auto maxHost = [&]() {
...@@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto absMaxCuda = [&]() { auto absMaxCuda = [&]() {
resultDevice = deviceVector.absMax(); resultDevice = deviceVector.absMax();
}; };
#ifdef HAVE_CUBLAS
auto absMaxCublas = [&]() {
int index = 0;
cublasIgamax( cublasHandle, size,
deviceVector.getData(), 1,
&index );
resultDevice = deviceVector.getElement( index );
};
#endif
benchmark.setOperation( "absMax", datasetSize ); benchmark.setOperation( "absMax", datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", absMaxHost, "CPU", absMaxHost,
"GPU", absMaxCuda ); "GPU", absMaxCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", absMaxCublas
#endif
);
auto absMinHost = [&]() { auto absMinHost = [&]() {
...@@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto absMinCuda = [&]() { auto absMinCuda = [&]() {
resultDevice = deviceVector.absMin(); resultDevice = deviceVector.absMin();
}; };
#ifdef HAVE_CUBLAS
auto absMinCublas = [&]() {
int index = 0;
cublasIgamin( cublasHandle, size,
deviceVector.getData(), 1,
&index );
resultDevice = deviceVector.getElement( index );
};
#endif
benchmark.setOperation( "absMin", datasetSize ); benchmark.setOperation( "absMin", datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", absMinHost, "CPU", absMinHost,
"GPU", absMinCuda ); "GPU", absMinCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", absMinCublas
#endif
);
auto sumHost = [&]() { auto sumHost = [&]() {
...@@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto l1normCuda = [&]() { auto l1normCuda = [&]() {
resultDevice = deviceVector.lpNorm( 1.0 ); resultDevice = deviceVector.lpNorm( 1.0 );
}; };
#ifdef HAVE_CUBLAS
auto l1normCublas = [&]() {
cublasGasum( cublasHandle, size,
deviceVector.getData(), 1,
&resultDevice );
};
#endif
benchmark.setOperation( "l1 norm", datasetSize ); benchmark.setOperation( "l1 norm", datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", l1normHost, "CPU", l1normHost,
"GPU", l1normCuda ); "GPU", l1normCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", l1normCublas
#endif
);
auto l2normHost = [&]() { auto l2normHost = [&]() {
...@@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark, ...@@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto l2normCuda = [&]() { auto l2normCuda = [&]() {
resultDevice = deviceVector.lpNorm( 2.0 ); resultDevice = deviceVector.lpNorm( 2.0 );
}; };
#ifdef HAVE_CUBLAS
auto l2normCublas = [&]() {
cublasGnrm2( cublasHandle, size,
deviceVector.getData(), 1,
&resultDevice );
};
#endif
benchmark.setOperation( "l2 norm", datasetSize ); benchmark.setOperation( "l2 norm", datasetSize );
benchmark.time( reset1, benchmark.time( reset1,
"CPU", l2normHost, "CPU", l2normHost,
"GPU", l2normCuda ); "GPU", l2normCuda
#ifdef HAVE_CUBLAS
, "cuBLAS", l2normCublas
#endif
);
auto l3normHost = [&]() { auto l3normHost = [&]() {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment