Commit 54c1655a authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Merge branch 'cuda-11.2' into 'develop'

CUDA 11.2

See merge request !78
parents 4ff2ce34 6bf315c3
Loading
Loading
Loading
Loading
+7 −5
Original line number Diff line number Diff line
@@ -210,11 +210,13 @@ if( ${WITH_CUDA} )
               set( CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} )
            endif()
        endif()
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ;-DHAVE_CUDA)
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA --expt-relaxed-constexpr --expt-extended-lambda)
        # disable false compiler warnings
        #   reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937
        #   incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --diag_suppress=2906 -Xcudafe --diag_suppress=2913 -Xcudafe --diag_suppress=2886 -Xcudafe --diag_suppress=2929 -Xcudafe --diag_suppress=2977 -Xcudafe --diag_suppress=3057 -Xcudafe --diag_suppress=3124 -Xcudafe --display_error_number)
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)
        # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)
        # temporarily disable host-compler warnings about VLAs, which are caused by nvcc's modifications to the source code
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -Wno-vla)
        # Select GPU architecture
+0 −7
Original line number Diff line number Diff line
@@ -40,13 +40,6 @@ DenseMatrixView( const IndexType rows,
                 const ValuesViewType& values )
 : MatrixView< Real, Device, Index >( rows, columns, values )
{
#ifdef __CUDA_ARCH__
   TNL_ASSERT_EQ( values.getSize(), this->getAllocatedElementsCount(), "Number of matrix elements does not agree with matrix dimensions." );
#else
   if( values.getSize() != this->getAllocatedElementsCount() )
      throw( std::logic_error( "Number of matrix elements does not agree with matrix dimensions." ) );
#endif

   SegmentsType a( rows, columns );
   segments = a.getView();
}
+48 −44
Original line number Diff line number Diff line
@@ -1310,9 +1310,6 @@ void test_VectorProduct()

   /////
   // Large test
   if( ( std::is_same< IndexType, int >::value || std::is_same< IndexType, long int >::value ) &&
      std::is_same< RealType, double >::value )
   {
   const IndexType size( 35 );
   //for( int size = 1; size < 1000; size++ )
   {
@@ -1330,6 +1327,10 @@ void test_VectorProduct()
         }
      };
      m1.forAllRows( f1 );
      // check that the matrix was initialized
      m1.getCompressedRowLengths( rowCapacities );
      EXPECT_EQ( rowCapacities, 1 );

      TNL::Containers::Vector< double, DeviceType, IndexType > in( size, 1.0 ), out( size, 0.0 );
      m1.vectorProduct( in, out );
      //std::cerr << out << std::endl;
@@ -1348,13 +1349,16 @@ void test_VectorProduct()
         }
      };
      m2.forAllRows( f2 );
      // check that the matrix was initialized
      TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( size );
      m2.getCompressedRowLengths( rowLengths );
      EXPECT_EQ( rowLengths, rowCapacities );

      out = 0.0;
      m2.vectorProduct( in, out );
      //std::cerr << out << std::endl;
      for( IndexType i = 0; i < size; i++ )
         EXPECT_EQ( out.getElement( i ), ( i + 1 ) * ( i + 2 ) / 2 );
         
      }
   }
}