Loading tests/benchmarks/cublasWrappers.h +96 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,70 @@ #include <cublas_v2.h> inline cublasStatus_t cublasIgamax( cublasHandle_t handle, int n, const float *x, int incx, int *result ) { return cublasIsamax( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamax( cublasHandle_t handle, int n, const double *x, int incx, int *result ) { return cublasIdamax( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamin( cublasHandle_t handle, int n, const float *x, int incx, int *result ) { return cublasIsamin( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamin( cublasHandle_t handle, int n, const double *x, int incx, int *result ) { return cublasIdamin( handle, n, x, incx, result ); } inline cublasStatus_t cublasGasum( cublasHandle_t handle, int n, const float *x, int incx, float *result ) { return cublasSasum( handle, n, x, incx, result ); } inline cublasStatus_t cublasGasum( cublasHandle_t handle, int n, const double *x, int incx, double *result ) { return cublasDasum( handle, n, x, incx, result ); } inline cublasStatus_t cublasGaxpy( cublasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy ) { return cublasSaxpy( handle, n, alpha, x, incx, y, incy ); } inline cublasStatus_t cublasGaxpy( cublasHandle_t handle, int n, const double *alpha, const double *x, int incx, double *y, int incy ) { return cublasDaxpy( handle, n, alpha, x, incx, y, incy ); } inline cublasStatus_t cublasGdot( cublasHandle_t handle, int n, const float *x, int incx, Loading @@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n, return cublasDdot( handle, n, x, incx, y, incy, result ); } inline cublasStatus_t cublasGnrm2( cublasHandle_t handle, int n, const float *x, int incx, float *result ) { return cublasSnrm2( handle, n, x, incx, result ); } inline cublasStatus_t cublasGnrm2( cublasHandle_t handle, int n, const double *x, int incx, double *result ) { return cublasDnrm2( handle, n, x, incx, result ); } inline cublasStatus_t cublasGscal( cublasHandle_t handle, int n, const float *alpha, float *x, int incx ) { return cublasSscal( handle, n, alpha, x, incx ); } inline cublasStatus_t cublasGscal( cublasHandle_t handle, int n, const double *alpha, double *x, int incx ) { return cublasDscal( handle, n, alpha, x, incx ); } #endif #endif tests/benchmarks/vector-operations.h +79 −6 Original line number Diff line number Diff line Loading @@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark, auto multiplyCuda = [&]() { deviceVector *= 0.5; }; #ifdef HAVE_CUBLAS auto multiplyCublas = [&]() { const Real alpha = 0.5; cublasGscal( cublasHandle, size, &alpha, deviceVector.getData(), 1 ); }; #endif benchmark.setOperation( "scalar multiplication", 2 * datasetSize ); benchmark.time( reset1, "CPU", multiplyHost, "GPU", multiplyCuda ); "GPU", multiplyCuda #ifdef HAVE_CUBLAS , "cuBLAS", multiplyCublas #endif ); auto addVectorHost = [&]() { Loading @@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto addVectorCuda = [&]() { deviceVector.addVector( deviceVector2 ); }; #ifdef HAVE_CUBLAS auto addVectorCublas = [&]() { const Real alpha = 1.0; cublasGaxpy( cublasHandle, size, &alpha, deviceVector2.getData(), 1, deviceVector.getData(), 1 ); }; #endif benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.time( reset1, "CPU", addVectorHost, "GPU", addVectorCuda ); "GPU", addVectorCuda #ifdef HAVE_CUBLAS , "cuBLAS", addVectorCublas #endif ); auto maxHost = [&]() { Loading Loading @@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; #ifdef HAVE_CUBLAS auto absMaxCublas = [&]() { int index = 0; cublasIgamax( cublasHandle, size, deviceVector.getData(), 1, &index ); resultDevice = deviceVector.getElement( index ); }; #endif benchmark.setOperation( "absMax", datasetSize ); benchmark.time( reset1, "CPU", absMaxHost, "GPU", absMaxCuda ); "GPU", absMaxCuda #ifdef HAVE_CUBLAS , "cuBLAS", absMaxCublas #endif ); auto absMinHost = [&]() { Loading @@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; #ifdef HAVE_CUBLAS auto absMinCublas = [&]() { int index = 0; cublasIgamin( cublasHandle, size, deviceVector.getData(), 1, &index ); resultDevice = deviceVector.getElement( index ); }; #endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time( reset1, "CPU", absMinHost, "GPU", absMinCuda ); "GPU", absMinCuda #ifdef HAVE_CUBLAS , "cuBLAS", absMinCublas #endif ); auto sumHost = [&]() { Loading @@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; #ifdef HAVE_CUBLAS auto l1normCublas = [&]() { cublasGasum( cublasHandle, size, deviceVector.getData(), 1, &resultDevice ); }; #endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time( reset1, "CPU", l1normHost, "GPU", l1normCuda ); "GPU", l1normCuda #ifdef HAVE_CUBLAS , "cuBLAS", l1normCublas #endif ); auto l2normHost = [&]() { Loading @@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; #ifdef HAVE_CUBLAS auto l2normCublas = [&]() { cublasGnrm2( cublasHandle, size, deviceVector.getData(), 1, &resultDevice ); }; #endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time( reset1, "CPU", l2normHost, "GPU", l2normCuda ); "GPU", l2normCuda #ifdef HAVE_CUBLAS , "cuBLAS", l2normCublas #endif ); auto l3normHost = [&]() { Loading Loading
tests/benchmarks/cublasWrappers.h +96 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,70 @@ #include <cublas_v2.h> inline cublasStatus_t cublasIgamax( cublasHandle_t handle, int n, const float *x, int incx, int *result ) { return cublasIsamax( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamax( cublasHandle_t handle, int n, const double *x, int incx, int *result ) { return cublasIdamax( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamin( cublasHandle_t handle, int n, const float *x, int incx, int *result ) { return cublasIsamin( handle, n, x, incx, result ); } inline cublasStatus_t cublasIgamin( cublasHandle_t handle, int n, const double *x, int incx, int *result ) { return cublasIdamin( handle, n, x, incx, result ); } inline cublasStatus_t cublasGasum( cublasHandle_t handle, int n, const float *x, int incx, float *result ) { return cublasSasum( handle, n, x, incx, result ); } inline cublasStatus_t cublasGasum( cublasHandle_t handle, int n, const double *x, int incx, double *result ) { return cublasDasum( handle, n, x, incx, result ); } inline cublasStatus_t cublasGaxpy( cublasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy ) { return cublasSaxpy( handle, n, alpha, x, incx, y, incy ); } inline cublasStatus_t cublasGaxpy( cublasHandle_t handle, int n, const double *alpha, const double *x, int incx, double *y, int incy ) { return cublasDaxpy( handle, n, alpha, x, incx, y, incy ); } inline cublasStatus_t cublasGdot( cublasHandle_t handle, int n, const float *x, int incx, Loading @@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n, return cublasDdot( handle, n, x, incx, y, incy, result ); } inline cublasStatus_t cublasGnrm2( cublasHandle_t handle, int n, const float *x, int incx, float *result ) { return cublasSnrm2( handle, n, x, incx, result ); } inline cublasStatus_t cublasGnrm2( cublasHandle_t handle, int n, const double *x, int incx, double *result ) { return cublasDnrm2( handle, n, x, incx, result ); } inline cublasStatus_t cublasGscal( cublasHandle_t handle, int n, const float *alpha, float *x, int incx ) { return cublasSscal( handle, n, alpha, x, incx ); } inline cublasStatus_t cublasGscal( cublasHandle_t handle, int n, const double *alpha, double *x, int incx ) { return cublasDscal( handle, n, alpha, x, incx ); } #endif #endif
tests/benchmarks/vector-operations.h +79 −6 Original line number Diff line number Diff line Loading @@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark, auto multiplyCuda = [&]() { deviceVector *= 0.5; }; #ifdef HAVE_CUBLAS auto multiplyCublas = [&]() { const Real alpha = 0.5; cublasGscal( cublasHandle, size, &alpha, deviceVector.getData(), 1 ); }; #endif benchmark.setOperation( "scalar multiplication", 2 * datasetSize ); benchmark.time( reset1, "CPU", multiplyHost, "GPU", multiplyCuda ); "GPU", multiplyCuda #ifdef HAVE_CUBLAS , "cuBLAS", multiplyCublas #endif ); auto addVectorHost = [&]() { Loading @@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto addVectorCuda = [&]() { deviceVector.addVector( deviceVector2 ); }; #ifdef HAVE_CUBLAS auto addVectorCublas = [&]() { const Real alpha = 1.0; cublasGaxpy( cublasHandle, size, &alpha, deviceVector2.getData(), 1, deviceVector.getData(), 1 ); }; #endif benchmark.setOperation( "vector addition", 3 * datasetSize ); benchmark.time( reset1, "CPU", addVectorHost, "GPU", addVectorCuda ); "GPU", addVectorCuda #ifdef HAVE_CUBLAS , "cuBLAS", addVectorCublas #endif ); auto maxHost = [&]() { Loading Loading @@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; #ifdef HAVE_CUBLAS auto absMaxCublas = [&]() { int index = 0; cublasIgamax( cublasHandle, size, deviceVector.getData(), 1, &index ); resultDevice = deviceVector.getElement( index ); }; #endif benchmark.setOperation( "absMax", datasetSize ); benchmark.time( reset1, "CPU", absMaxHost, "GPU", absMaxCuda ); "GPU", absMaxCuda #ifdef HAVE_CUBLAS , "cuBLAS", absMaxCublas #endif ); auto absMinHost = [&]() { Loading @@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; #ifdef HAVE_CUBLAS auto absMinCublas = [&]() { int index = 0; cublasIgamin( cublasHandle, size, deviceVector.getData(), 1, &index ); resultDevice = deviceVector.getElement( index ); }; #endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time( reset1, "CPU", absMinHost, "GPU", absMinCuda ); "GPU", absMinCuda #ifdef HAVE_CUBLAS , "cuBLAS", absMinCublas #endif ); auto sumHost = [&]() { Loading @@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; #ifdef HAVE_CUBLAS auto l1normCublas = [&]() { cublasGasum( cublasHandle, size, deviceVector.getData(), 1, &resultDevice ); }; #endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time( reset1, "CPU", l1normHost, "GPU", l1normCuda ); "GPU", l1normCuda #ifdef HAVE_CUBLAS , "cuBLAS", l1normCublas #endif ); auto l2normHost = [&]() { Loading @@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; #ifdef HAVE_CUBLAS auto l2normCublas = [&]() { cublasGnrm2( cublasHandle, size, deviceVector.getData(), 1, &resultDevice ); }; #endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time( reset1, "CPU", l2normHost, "GPU", l2normCuda ); "GPU", l2normCuda #ifdef HAVE_CUBLAS , "cuBLAS", l2normCublas #endif ); auto l3normHost = [&]() { Loading