Commit 1f26cbe9 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Merge branch 'JK/workarounds' into 'develop'

Workarounds

See merge request !140
parents bc5dd706 79ca9b90
Loading
Loading
Loading
Loading
+5 −1
Original line number Original line Diff line number Diff line
@@ -202,7 +202,7 @@ endif()
# Check for CUDA
# Check for CUDA
#
#
if( ${WITH_CUDA} )
if( ${WITH_CUDA} )
    find_package( CUDA 9.0 )
    find_package( CUDA 10.0 )
    if( CUDA_FOUND )
    if( CUDA_FOUND )
        set( BUILD_CUDA TRUE)
        set( BUILD_CUDA TRUE)
        set(CUDA_SEPARABLE_COMPILATION ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
@@ -229,6 +229,10 @@ if( ${WITH_CUDA} )
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)
        # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
        # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)
        # nvcc 10 causes many invalid VLA errors in the host code
        if( ${CUDA_VERSION_MAJOR} STREQUAL "10" )
           set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -Wno-vla )
        endif()
        # Select GPU architecture
        # Select GPU architecture
        ## cmake bug: cuda_select_nvcc_arch_flags does not work with CMAKE_EXECUTABLE_SUFFIX
        ## cmake bug: cuda_select_nvcc_arch_flags does not work with CMAKE_EXECUTABLE_SUFFIX
        ## see https://gitlab.kitware.com/cmake/cmake/issues/19636
        ## see https://gitlab.kitware.com/cmake/cmake/issues/19636
+3 −42
Original line number Original line Diff line number Diff line
@@ -302,20 +302,7 @@ Reduction< Devices::Cuda >::reduce( const Index begin, const Index end, Fetch&&


   if( can_reduce_later_on_host ) {
   if( can_reduce_later_on_host ) {
      // transfer the reduced data from device to host
      // transfer the reduced data from device to host
      std::unique_ptr< Result[] > resultArray{
      std::unique_ptr< Result[] > resultArray{ new Result[ reducedSize ] };
// Workaround for nvcc 10.1.168 - it would modify the simple expression
// `new Result[reducedSize]` in the source code to `new (Result[reducedSize])`
// which is not correct - see e.g. https://stackoverflow.com/a/39671946
// Thus, the host compiler would spit out hundreds of warnings...
// Funnily enough, nvcc's behaviour depends on the context rather than the
// expression, because exactly the same simple expression in different places
// does not produce warnings.
#ifdef __NVCC__
         new Result[ static_cast< const int& >( reducedSize ) ]
#else
         new Result[ reducedSize ]
#endif
      };
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );


#ifdef CUDA_REDUCTION_PROFILING
#ifdef CUDA_REDUCTION_PROFILING
@@ -392,34 +379,8 @@ Reduction< Devices::Cuda >::reduceWithArgument( const Index begin,


   if( can_reduce_later_on_host ) {
   if( can_reduce_later_on_host ) {
      // transfer the reduced data from device to host
      // transfer the reduced data from device to host
      std::unique_ptr< Result[] > resultArray{
      std::unique_ptr< Result[] > resultArray{ new Result[ reducedSize ] };
// Workaround for nvcc 10.1.168 - it would modify the simple expression
      std::unique_ptr< Index[] > indexArray{ new Index[ reducedSize ] };
// `new Result[reducedSize]` in the source code to `new (Result[reducedSize])`
// which is not correct - see e.g. https://stackoverflow.com/a/39671946
// Thus, the host compiler would spit out hundreds of warnings...
// Funnily enough, nvcc's behaviour depends on the context rather than the
// expression, because exactly the same simple expression in different places
// does not produce warnings.
#ifdef __NVCC__
         new Result[ static_cast< const int& >( reducedSize ) ]
#else
         new Result[ reducedSize ]
#endif
      };
      std::unique_ptr< Index[] > indexArray{
// Workaround for nvcc 10.1.168 - it would modify the simple expression
// `new Index[reducedSize]` in the source code to `new (Index[reducedSize])`
// which is not correct - see e.g. https://stackoverflow.com/a/39671946
// Thus, the host compiler would spit out hundreds of warnings...
// Funnily enough, nvcc's behaviour depends on the context rather than the
// expression, because exactly the same simple expression in different places
// does not produce warnings.
#ifdef __NVCC__
         new Index[ static_cast< const int& >( reducedSize ) ]
#else
         new Index[ reducedSize ]
#endif
      };
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize );