Added more cuBLAS benchmarks (743414e0) · Commits · TNL / tnl-dev

tests/benchmarks/cublasWrappers.h

+96 −0

Original line number	Diff line number	Diff line
		@@ -5,6 +5,70 @@

		#include <cublas_v2.h>

		inline cublasStatus_t
		cublasIgamax( cublasHandle_t handle, int n,
		const float x, int incx, int result )
		{
		return cublasIsamax( handle, n, x, incx, result );
		}

		inline cublasStatus_t
		cublasIgamax( cublasHandle_t handle, int n,
		const double x, int incx, int result )
		{
		return cublasIdamax( handle, n, x, incx, result );
		}


		inline cublasStatus_t
		cublasIgamin( cublasHandle_t handle, int n,
		const float x, int incx, int result )
		{
		return cublasIsamin( handle, n, x, incx, result );
		}

		inline cublasStatus_t
		cublasIgamin( cublasHandle_t handle, int n,
		const double x, int incx, int result )
		{
		return cublasIdamin( handle, n, x, incx, result );
		}


		inline cublasStatus_t
		cublasGasum( cublasHandle_t handle, int n,
		const float x, int incx, float result )
		{
		return cublasSasum( handle, n, x, incx, result );
		}

		inline cublasStatus_t
		cublasGasum( cublasHandle_t handle, int n,
		const double x, int incx, double result )
		{
		return cublasDasum( handle, n, x, incx, result );
		}


		inline cublasStatus_t
		cublasGaxpy( cublasHandle_t handle, int n,
		const float *alpha,
		const float *x, int incx,
		float *y, int incy )
		{
		return cublasSaxpy( handle, n, alpha, x, incx, y, incy );
		}

		inline cublasStatus_t
		cublasGaxpy( cublasHandle_t handle, int n,
		const double *alpha,
		const double *x, int incx,
		double *y, int incy )
		{
		return cublasDaxpy( handle, n, alpha, x, incx, y, incy );
		}


		inline cublasStatus_t
		cublasGdot( cublasHandle_t handle, int n,
		const float *x, int incx,
		@@ -23,5 +87,37 @@ cublasGdot( cublasHandle_t handle, int n,
		return cublasDdot( handle, n, x, incx, y, incy, result );
		}


		inline cublasStatus_t
		cublasGnrm2( cublasHandle_t handle, int n,
		const float x, int incx, float result )
		{
		return cublasSnrm2( handle, n, x, incx, result );
		}

		inline cublasStatus_t
		cublasGnrm2( cublasHandle_t handle, int n,
		const double x, int incx, double result )
		{
		return cublasDnrm2( handle, n, x, incx, result );
		}


		inline cublasStatus_t
		cublasGscal( cublasHandle_t handle, int n,
		const float *alpha,
		float *x, int incx )
		{
		return cublasSscal( handle, n, alpha, x, incx );
		}

		inline cublasStatus_t
		cublasGscal( cublasHandle_t handle, int n,
		const double *alpha,
		double *x, int incx )
		{
		return cublasDscal( handle, n, alpha, x, incx );
		}

		#endif
		#endif

tests/benchmarks/vector-operations.h

+79 −6

Original line number	Diff line number	Diff line
		@@ -71,10 +71,22 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto multiplyCuda = [&]() {
		deviceVector *= 0.5;
		};
		#ifdef HAVE_CUBLAS
		auto multiplyCublas = [&]() {
		const Real alpha = 0.5;
		cublasGscal( cublasHandle, size,
		&alpha,
		deviceVector.getData(), 1 );
		};
		#endif
		benchmark.setOperation( "scalar multiplication", 2 * datasetSize );
		benchmark.time( reset1,
		"CPU", multiplyHost,
		"GPU", multiplyCuda );
		"GPU", multiplyCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", multiplyCublas
		#endif
		);


		auto addVectorHost = [&]() {
		@@ -83,10 +95,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto addVectorCuda = [&]() {
		deviceVector.addVector( deviceVector2 );
		};
		#ifdef HAVE_CUBLAS
		auto addVectorCublas = [&]() {
		const Real alpha = 1.0;
		cublasGaxpy( cublasHandle, size,
		&alpha,
		deviceVector2.getData(), 1,
		deviceVector.getData(), 1 );
		};
		#endif
		benchmark.setOperation( "vector addition", 3 * datasetSize );
		benchmark.time( reset1,
		"CPU", addVectorHost,
		"GPU", addVectorCuda );
		"GPU", addVectorCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", addVectorCublas
		#endif
		);


		auto maxHost = [&]() {
		@@ -119,10 +144,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto absMaxCuda = [&]() {
		resultDevice = deviceVector.absMax();
		};
		#ifdef HAVE_CUBLAS
		auto absMaxCublas = [&]() {
		int index = 0;
		cublasIgamax( cublasHandle, size,
		deviceVector.getData(), 1,
		&index );
		resultDevice = deviceVector.getElement( index );
		};
		#endif
		benchmark.setOperation( "absMax", datasetSize );
		benchmark.time( reset1,
		"CPU", absMaxHost,
		"GPU", absMaxCuda );
		"GPU", absMaxCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", absMaxCublas
		#endif
		);


		auto absMinHost = [&]() {
		@@ -131,10 +169,23 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto absMinCuda = [&]() {
		resultDevice = deviceVector.absMin();
		};
		#ifdef HAVE_CUBLAS
		auto absMinCublas = [&]() {
		int index = 0;
		cublasIgamin( cublasHandle, size,
		deviceVector.getData(), 1,
		&index );
		resultDevice = deviceVector.getElement( index );
		};
		#endif
		benchmark.setOperation( "absMin", datasetSize );
		benchmark.time( reset1,
		"CPU", absMinHost,
		"GPU", absMinCuda );
		"GPU", absMinCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", absMinCublas
		#endif
		);


		auto sumHost = [&]() {
		@@ -155,10 +206,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto l1normCuda = [&]() {
		resultDevice = deviceVector.lpNorm( 1.0 );
		};
		#ifdef HAVE_CUBLAS
		auto l1normCublas = [&]() {
		cublasGasum( cublasHandle, size,
		deviceVector.getData(), 1,
		&resultDevice );
		};
		#endif
		benchmark.setOperation( "l1 norm", datasetSize );
		benchmark.time( reset1,
		"CPU", l1normHost,
		"GPU", l1normCuda );
		"GPU", l1normCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", l1normCublas
		#endif
		);


		auto l2normHost = [&]() {
		@@ -167,10 +229,21 @@ benchmarkVectorOperations( Benchmark & benchmark,
		auto l2normCuda = [&]() {
		resultDevice = deviceVector.lpNorm( 2.0 );
		};
		#ifdef HAVE_CUBLAS
		auto l2normCublas = [&]() {
		cublasGnrm2( cublasHandle, size,
		deviceVector.getData(), 1,
		&resultDevice );
		};
		#endif
		benchmark.setOperation( "l2 norm", datasetSize );
		benchmark.time( reset1,
		"CPU", l2normHost,
		"GPU", l2normCuda );
		"GPU", l2normCuda
		#ifdef HAVE_CUBLAS
		, "cuBLAS", l2normCublas
		#endif
		);


		auto l3normHost = [&]() {