Merge branch 'cuda-11.2' into 'develop' (54c1655a) · Commits · TNL / tnl-dev

CMakeLists.txt

+7 −5

Original line number	Diff line number	Diff line
		@@ -210,11 +210,13 @@ if( ${WITH_CUDA} )
		set( CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} )
		endif()
		endif()
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ;-DHAVE_CUDA)
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA --expt-relaxed-constexpr --expt-extended-lambda)
		# disable false compiler warnings
		# reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937
		# incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --diag_suppress=2906 -Xcudafe --diag_suppress=2913 -Xcudafe --diag_suppress=2886 -Xcudafe --diag_suppress=2929 -Xcudafe --diag_suppress=2977 -Xcudafe --diag_suppress=3057 -Xcudafe --diag_suppress=3124 -Xcudafe --display_error_number)
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number)
		# This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored)
		# temporarily disable host-compler warnings about VLAs, which are caused by nvcc's modifications to the source code
		set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -Wno-vla)
		# Select GPU architecture

src/TNL/Matrices/DenseMatrixView.hpp

+0 −7

Original line number	Diff line number	Diff line
		@@ -40,13 +40,6 @@ DenseMatrixView( const IndexType rows,
		const ValuesViewType& values )
		: MatrixView< Real, Device, Index >( rows, columns, values )
		{
		#ifdef __CUDA_ARCH__
		TNL_ASSERT_EQ( values.getSize(), this->getAllocatedElementsCount(), "Number of matrix elements does not agree with matrix dimensions." );
		#else
		if( values.getSize() != this->getAllocatedElementsCount() )
		throw( std::logic_error( "Number of matrix elements does not agree with matrix dimensions." ) );
		#endif

		SegmentsType a( rows, columns );
		segments = a.getView();
		}

src/UnitTests/Matrices/SparseMatrixTest.hpp

+48 −44

Original line number	Diff line number	Diff line
		@@ -1310,9 +1310,6 @@ void test_VectorProduct()

		/////
		// Large test
		if( ( std::is_same< IndexType, int >::value \|\| std::is_same< IndexType, long int >::value ) &&
		std::is_same< RealType, double >::value )
		{
		const IndexType size( 35 );
		//for( int size = 1; size < 1000; size++ )
		{
		@@ -1330,6 +1327,10 @@ void test_VectorProduct()
		}
		};
		m1.forAllRows( f1 );
		// check that the matrix was initialized
		m1.getCompressedRowLengths( rowCapacities );
		EXPECT_EQ( rowCapacities, 1 );

		TNL::Containers::Vector< double, DeviceType, IndexType > in( size, 1.0 ), out( size, 0.0 );
		m1.vectorProduct( in, out );
		//std::cerr << out << std::endl;
		@@ -1348,13 +1349,16 @@ void test_VectorProduct()
		}
		};
		m2.forAllRows( f2 );
		// check that the matrix was initialized
		TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( size );
		m2.getCompressedRowLengths( rowLengths );
		EXPECT_EQ( rowLengths, rowCapacities );

		out = 0.0;
		m2.vectorProduct( in, out );
		//std::cerr << out << std::endl;
		for( IndexType i = 0; i < size; i++ )
		EXPECT_EQ( out.getElement( i ), ( i + 1 ) * ( i + 2 ) / 2 );

		}
		}
		}