Avoiding compiler warnings for builds without CUDA (7390a03b) · Commits · TNL / tnl-dev

src/Benchmarks/BLAS/array-operations.h

+16 −16

Original line number	Diff line number	Diff line
		@@ -69,12 +69,12 @@ benchmarkArrayOperations( Benchmark & benchmark,
		auto compareHost = [&]() {
		resultHost = (int) ( hostArray == hostArray2 );
		};
		auto compareCuda = [&]() {
		resultDevice = (int) ( deviceArray == deviceArray2 );
		};
		benchmark.setOperation( "comparison (operator==)", 2 * datasetSize );
		benchmark.time< Devices::Host >( reset1, "CPU", compareHost );
		#ifdef HAVE_CUDA
		auto compareCuda = [&]() {
		resultDevice = (int) ( deviceArray == deviceArray2 );
		};
		benchmark.time< Devices::Cuda >( reset1, "GPU", compareCuda );
		#endif

		@@ -82,25 +82,25 @@ benchmarkArrayOperations( Benchmark & benchmark,
		auto copyAssignHostHost = [&]() {
		hostArray = hostArray2;
		};
		auto copyAssignCudaCuda = [&]() {
		deviceArray = deviceArray2;
		};
		benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
		// copyBasetime is used later inside HAVE_CUDA guard, so the compiler will
		// complain when compiling without CUDA
		const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost );
		#ifdef HAVE_CUDA
		auto copyAssignCudaCuda = [&]() {
		deviceArray = deviceArray2;
		};
		benchmark.time< Devices::Cuda >( reset1, "GPU", copyAssignCudaCuda );
		#endif


		#ifdef HAVE_CUDA
		auto copyAssignHostCuda = [&]() {
		deviceArray = hostArray;
		};
		auto copyAssignCudaHost = [&]() {
		hostArray = deviceArray;
		};
		#ifdef HAVE_CUDA
		benchmark.setOperation( "copy (operator=)", datasetSize, copyBasetime );
		benchmark.time< Devices::Cuda >( reset1, "CPU->GPU", copyAssignHostCuda );
		benchmark.time< Devices::Cuda >( reset1, "GPU->CPU", copyAssignCudaHost );
		@@ -110,12 +110,12 @@ benchmarkArrayOperations( Benchmark & benchmark,
		auto setValueHost = [&]() {
		hostArray.setValue( 3.0 );
		};
		auto setValueCuda = [&]() {
		deviceArray.setValue( 3.0 );
		};
		benchmark.setOperation( "setValue", datasetSize );
		benchmark.time< Devices::Host >( reset1, "CPU", setValueHost );
		#ifdef HAVE_CUDA
		auto setValueCuda = [&]() {
		deviceArray.setValue( 3.0 );
		};
		benchmark.time< Devices::Cuda >( reset1, "GPU", setValueCuda );
		#endif

		@@ -123,9 +123,6 @@ benchmarkArrayOperations( Benchmark & benchmark,
		auto setSizeHost = [&]() {
		hostArray.setSize( size );
		};
		auto setSizeCuda = [&]() {
		deviceArray.setSize( size );
		};
		auto resetSize1 = [&]() {
		hostArray.reset();
		#ifdef HAVE_CUDA
		@@ -135,6 +132,9 @@ benchmarkArrayOperations( Benchmark & benchmark,
		benchmark.setOperation( "allocation (setSize)", datasetSize );
		benchmark.time< Devices::Host >( resetSize1, "CPU", setSizeHost );
		#ifdef HAVE_CUDA
		auto setSizeCuda = [&]() {
		deviceArray.setSize( size );
		};
		benchmark.time< Devices::Cuda >( resetSize1, "GPU", setSizeCuda );
		#endif

		@@ -142,9 +142,6 @@ benchmarkArrayOperations( Benchmark & benchmark,
		auto resetSizeHost = [&]() {
		hostArray.reset();
		};
		auto resetSizeCuda = [&]() {
		deviceArray.reset();
		};
		auto setSize1 = [&]() {
		hostArray.setSize( size );
		#ifdef HAVE_CUDA
		@@ -154,6 +151,9 @@ benchmarkArrayOperations( Benchmark & benchmark,
		benchmark.setOperation( "deallocation (reset)", datasetSize );
		benchmark.time< Devices::Host >( setSize1, "CPU", resetSizeHost );
		#ifdef HAVE_CUDA
		auto resetSizeCuda = [&]() {
		deviceArray.reset();
		};
		benchmark.time< Devices::Cuda >( setSize1, "GPU", resetSizeCuda );
		#endif
		}

src/Benchmarks/BLAS/spmv.h

+3 −4

Original line number	Diff line number	Diff line
		@@ -155,13 +155,12 @@ benchmarkSpMV( Benchmark & benchmark,
		auto spmvHost = [&]() {
		hostMatrix.vectorProduct( hostVector, hostVector2 );
		};
		auto spmvCuda = [&]() {
		deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
		};

		benchmark.setOperation( datasetSize );
		benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
		#ifdef HAVE_CUDA
		auto spmvCuda = [&]() {
		deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
		};
		benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
		#endif
		}

src/Benchmarks/BLAS/vector-operations.h

+134 −153

File changed.

Preview size limit exceeded, changes collapsed.

src/TNL/Containers/ndarray/SizesHolderHelpers.h

+4 −2

Original line number	Diff line number	Diff line
		@@ -184,7 +184,8 @@ struct SetSizesCopyHelper
		target.template setSize< level >( source.template getSize< level >() );
		SetSizesCopyHelper< TargetHolder, SourceHolder, level - 1 >::copy( target, source );
		}
		else if( target.template getStaticSize< level >() != source.template getSize< level >() )
		else if( source.template getSize< level >() < 0 \|\|
		target.template getStaticSize< level >() != (std::size_t) source.template getSize< level >() )
		throw std::logic_error( "Cannot copy sizes due to inconsistent underlying types (static sizes don't match)." );
		}
		};
		@@ -198,7 +199,8 @@ struct SetSizesCopyHelper< TargetHolder, SourceHolder, 0 >
		{
		if( target.template getStaticSize< 0 >() == 0 )
		target.template setSize< 0 >( source.template getSize< 0 >() );
		else if( target.template getStaticSize< 0 >() != source.template getSize< 0 >() )
		else if( source.template getSize< 0 >() \|\|
		target.template getStaticSize< 0 >() != (std::size_t) source.template getSize< 0 >() )
		throw std::logic_error( "Cannot copy sizes due to inconsistent underlying types (static sizes don't match)." );
		}
		};

src/TNL/Devices/Cuda_impl.h

+5 −2

Original line number	Diff line number	Diff line
		@@ -294,6 +294,7 @@ __device__ Element* Cuda::getSharedMemory()
		{
		return CudaSharedMemory< Element >();
		}
		#endif

		#ifdef HAVE_CUDA
		inline void Cuda::checkDevice( const char* file_name, int line, cudaError error )
		@@ -326,6 +327,8 @@ inline bool Cuda::synchronizeDevice( int deviceId )
		getSmartPointersSynchronizationTimer().stop();
		return b;
		#endif
		#else
		return true;
		#endif
		}

		@@ -353,6 +356,7 @@ namespace {

		// double-precision atomicAdd function for Maxwell and older GPUs
		// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
		#ifdef HAVE_CUDA
		#if __CUDA_ARCH__ < 600
		namespace {
		__device__ double atomicAdd(double* address, double val)
		@@ -374,8 +378,7 @@ namespace {
		}
		} // namespace
		#endif

		#endif /* HAVE_CUDA */
		#endif

		} // namespace Devices
		} // namespace TNL