Merge branch 'JK/workarounds' into 'develop' (1f26cbe9) · Commits · TNL / tnl-dev

CMakeLists.txt

+5 −1

Original line number	Original line	Diff line number	Diff line
	@@ -202,7 +202,7 @@ endif()
	# Check for CUDA		# Check for CUDA
	#		#
	if( ${WITH_CUDA} )		if( ${WITH_CUDA} )
	find_package( CUDA 9.0 )		find_package( CUDA 10.0 )
	if( CUDA_FOUND )		if( CUDA_FOUND )
	set( BUILD_CUDA TRUE)		set( BUILD_CUDA TRUE)
	set(CUDA_SEPARABLE_COMPILATION ON)		set(CUDA_SEPARABLE_COMPILATION ON)
	@@ -229,6 +229,10 @@ if( ${WITH_CUDA} )
	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)
	# This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470		# This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)
			# nvcc 10 causes many invalid VLA errors in the host code
			if( ${CUDA_VERSION_MAJOR} STREQUAL "10" )
			set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -Wno-vla )
			endif()
	# Select GPU architecture		# Select GPU architecture
	## cmake bug: cuda_select_nvcc_arch_flags does not work with CMAKE_EXECUTABLE_SUFFIX		## cmake bug: cuda_select_nvcc_arch_flags does not work with CMAKE_EXECUTABLE_SUFFIX
	## see https://gitlab.kitware.com/cmake/cmake/issues/19636		## see https://gitlab.kitware.com/cmake/cmake/issues/19636

src/TNL/Algorithms/detail/Reduction.hpp

+3 −42

Original line number	Original line	Diff line number	Diff line
	@@ -302,20 +302,7 @@ Reduction< Devices::Cuda >::reduce( const Index begin, const Index end, Fetch&&

	if( can_reduce_later_on_host ) {		if( can_reduce_later_on_host ) {
	// transfer the reduced data from device to host		// transfer the reduced data from device to host
	std::unique_ptr< Result[] > resultArray{		std::unique_ptr< Result[] > resultArray{ new Result[ reducedSize ] };
	// Workaround for nvcc 10.1.168 - it would modify the simple expression
	// `new Result[reducedSize]` in the source code to `new (Result[reducedSize])`
	// which is not correct - see e.g. https://stackoverflow.com/a/39671946
	// Thus, the host compiler would spit out hundreds of warnings...
	// Funnily enough, nvcc's behaviour depends on the context rather than the
	// expression, because exactly the same simple expression in different places
	// does not produce warnings.
	#ifdef __NVCC__
	new Result[ static_cast< const int& >( reducedSize ) ]
	#else
	new Result[ reducedSize ]
	#endif
	};
	MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );

	#ifdef CUDA_REDUCTION_PROFILING		#ifdef CUDA_REDUCTION_PROFILING
	@@ -392,34 +379,8 @@ Reduction< Devices::Cuda >::reduceWithArgument( const Index begin,

	if( can_reduce_later_on_host ) {		if( can_reduce_later_on_host ) {
	// transfer the reduced data from device to host		// transfer the reduced data from device to host
	std::unique_ptr< Result[] > resultArray{		std::unique_ptr< Result[] > resultArray{ new Result[ reducedSize ] };
	// Workaround for nvcc 10.1.168 - it would modify the simple expression		std::unique_ptr< Index[] > indexArray{ new Index[ reducedSize ] };
	// `new Result[reducedSize]` in the source code to `new (Result[reducedSize])`
	// which is not correct - see e.g. https://stackoverflow.com/a/39671946
	// Thus, the host compiler would spit out hundreds of warnings...
	// Funnily enough, nvcc's behaviour depends on the context rather than the
	// expression, because exactly the same simple expression in different places
	// does not produce warnings.
	#ifdef __NVCC__
	new Result[ static_cast< const int& >( reducedSize ) ]
	#else
	new Result[ reducedSize ]
	#endif
	};
	std::unique_ptr< Index[] > indexArray{
	// Workaround for nvcc 10.1.168 - it would modify the simple expression
	// `new Index[reducedSize]` in the source code to `new (Index[reducedSize])`
	// which is not correct - see e.g. https://stackoverflow.com/a/39671946
	// Thus, the host compiler would spit out hundreds of warnings...
	// Funnily enough, nvcc's behaviour depends on the context rather than the
	// expression, because exactly the same simple expression in different places
	// does not produce warnings.
	#ifdef __NVCC__
	new Index[ static_cast< const int& >( reducedSize ) ]
	#else
	new Index[ reducedSize ]
	#endif
	};
	MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
	MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );		MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );