From 18aeeef8988d03f3c3ae24c83f664dce17110226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 24 Dec 2020 18:40:01 +0100 Subject: [PATCH 1/4] CMakeLists.txt: suppress another useless nvcc warning from CUDA 11.2 --- CMakeLists.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 874d39f6a..2a59a4117 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -215,6 +215,10 @@ if( ${WITH_CUDA} ) # reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937 # incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --diag_suppress=2906 -Xcudafe --diag_suppress=2913 -Xcudafe --diag_suppress=2886 -Xcudafe --diag_suppress=2929 -Xcudafe --diag_suppress=2977 -Xcudafe --diag_suppress=3057 -Xcudafe --diag_suppress=3124 -Xcudafe --display_error_number) + if(CUDA_VERSION_STRING VERSION_GREATER_EQUAL "11.2") + # this diag number would cause an error on older nvcc + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=20012) + endif() # temporarily disable host-compler warnings about VLAs, which are caused by nvcc's modifications to the source code set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -Wno-vla) # Select GPU architecture @@ -263,7 +267,7 @@ if( JPEG_FOUND ) endif() #### -# Test for GMP +# Test for GMP # if( ${WITH_GMP} ) if (GMP_INCLUDES AND GMP_LIBRARIES) @@ -301,7 +305,7 @@ endif() # DOC "PETSC headers." # ) # if( ${PETSC_INCLUDE_DIR} STREQUAL "PETSC_INCLUDE_DIR-NOTFOUND" ) -# message( "PETSC not found." ) +# message( "PETSC not found." ) # else() # message( "PETSC headers found -- ${PETSC_INCLUDE_DIR}" ) # FIND_LIBRARY(PETSC_LIBRARY petsc @@ -313,7 +317,7 @@ endif() # #set( PETSC_LIBRARY "${MPI_LIBRARIES} ${PETSC_LIBRARY}") # message( "PETSC library found -- ${PETSC_LIBRARY}") # list( GET MPI_CXX_INCLUDE_PATH 0 MPI_CXX_PATH ) -# set(PETSC_CXX_FLAGS "-DHAVE_PETSC -I${PETSC_INCLUDE_DIR} -DHAVE_MPI -I${MPI_CXX_PATH}") +# set(PETSC_CXX_FLAGS "-DHAVE_PETSC -I${PETSC_INCLUDE_DIR} -DHAVE_MPI -I${MPI_CXX_PATH}") # endif() # endif() #endif() -- GitLab From 64e6732390041e01f00a583385436ef9acb61d1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Fri, 25 Dec 2020 01:28:54 +0100 Subject: [PATCH 2/4] Removed useless assert from DenseMatrixView The condition is always satisfied, since getAllocatedElementsCount returns the size of the values vector view, which was bound just prior the assert. --- src/TNL/Matrices/DenseMatrixView.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index c76e0c1f6..d7a781e20 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -40,13 +40,6 @@ DenseMatrixView( const IndexType rows, const ValuesViewType& values ) : MatrixView< Real, Device, Index >( rows, columns, values ) { -#ifdef __CUDA_ARCH__ - TNL_ASSERT_EQ( values.getSize(), this->getAllocatedElementsCount(), "Number of matrix elements does not agree with matrix dimensions." ); -#else - if( values.getSize() != this->getAllocatedElementsCount() ) - throw( std::logic_error( "Number of matrix elements does not agree with matrix dimensions." ) ); -#endif - SegmentsType a( rows, columns ); segments = a.getView(); } -- GitLab From 0a5550bb29e16af1daf2bad0b8f7fcd7462b7118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sat, 26 Dec 2020 19:11:17 +0100 Subject: [PATCH 3/4] CMakeLists.txt: cleaned up nvcc flags --- CMakeLists.txt | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a59a4117..05a0fd0b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,15 +210,13 @@ if( ${WITH_CUDA} ) set( CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} ) endif() endif() - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ;-DHAVE_CUDA) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA --expt-relaxed-constexpr --expt-extended-lambda) # disable false compiler warnings # reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937 # incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --diag_suppress=2906 -Xcudafe --diag_suppress=2913 -Xcudafe --diag_suppress=2886 -Xcudafe --diag_suppress=2929 -Xcudafe --diag_suppress=2977 -Xcudafe --diag_suppress=3057 -Xcudafe --diag_suppress=3124 -Xcudafe --display_error_number) - if(CUDA_VERSION_STRING VERSION_GREATER_EQUAL "11.2") - # this diag number would cause an error on older nvcc - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=20012) - endif() + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=code_is_unreachable -Xcudafe --diag_suppress=loop_not_reachable -Xcudafe --diag_suppress=implicit_return_from_non_void_function -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --display_error_number) + # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470 + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored) # temporarily disable host-compler warnings about VLAs, which are caused by nvcc's modifications to the source code set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -Wno-vla) # Select GPU architecture -- GitLab From 6bf315c3c4d91eb80ecfa8ca525b0833a89c3522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sat, 26 Dec 2020 12:04:55 +0100 Subject: [PATCH 4/4] Improved test of vectorProduct in the SparseMatrixTest --- src/UnitTests/Matrices/SparseMatrixTest.hpp | 92 +++++++++++---------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 41d5025a0..7c0d831a8 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -1310,51 +1310,55 @@ void test_VectorProduct() ///// // Large test - if( ( std::is_same< IndexType, int >::value || std::is_same< IndexType, long int >::value ) && - std::is_same< RealType, double >::value ) + const IndexType size( 35 ); + //for( int size = 1; size < 1000; size++ ) { - const IndexType size( 35 ); - //for( int size = 1; size < 1000; size++ ) - { - //std::cerr << " size = " << size << std::endl; - // Test with large diagonal matrix - Matrix m1( size, size ); - TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowCapacities( size ); - rowCapacities.evaluate( [] __cuda_callable__ ( IndexType i ) { return 1; } ); - m1.setRowCapacities( rowCapacities ); - auto f1 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) { - if( localIdx == 0 ) - { - value = row + 1; - column = row; - } - }; - m1.forAllRows( f1 ); - TNL::Containers::Vector< double, DeviceType, IndexType > in( size, 1.0 ), out( size, 0.0 ); - m1.vectorProduct( in, out ); - //std::cerr << out << std::endl; - for( IndexType i = 0; i < size; i++ ) - EXPECT_EQ( out.getElement( i ), i + 1 ); - - // Test with large triangular matrix - Matrix m2( size, size ); - rowCapacities.evaluate( [] __cuda_callable__ ( IndexType i ) { return i + 1; } ); - m2.setRowCapacities( rowCapacities ); - auto f2 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) { - if( localIdx <= row ) - { - value = row -localIdx + 1; - column = localIdx; - } - }; - m2.forAllRows( f2 ); - out = 0.0; - m2.vectorProduct( in, out ); - //std::cerr << out << std::endl; - for( IndexType i = 0; i < size; i++ ) - EXPECT_EQ( out.getElement( i ), ( i + 1 ) * ( i + 2 ) / 2 ); - - } + //std::cerr << " size = " << size << std::endl; + // Test with large diagonal matrix + Matrix m1( size, size ); + TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowCapacities( size ); + rowCapacities.evaluate( [] __cuda_callable__ ( IndexType i ) { return 1; } ); + m1.setRowCapacities( rowCapacities ); + auto f1 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) { + if( localIdx == 0 ) + { + value = row + 1; + column = row; + } + }; + m1.forAllRows( f1 ); + // check that the matrix was initialized + m1.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, 1 ); + + TNL::Containers::Vector< double, DeviceType, IndexType > in( size, 1.0 ), out( size, 0.0 ); + m1.vectorProduct( in, out ); + //std::cerr << out << std::endl; + for( IndexType i = 0; i < size; i++ ) + EXPECT_EQ( out.getElement( i ), i + 1 ); + + // Test with large triangular matrix + Matrix m2( size, size ); + rowCapacities.evaluate( [] __cuda_callable__ ( IndexType i ) { return i + 1; } ); + m2.setRowCapacities( rowCapacities ); + auto f2 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) { + if( localIdx <= row ) + { + value = row -localIdx + 1; + column = localIdx; + } + }; + m2.forAllRows( f2 ); + // check that the matrix was initialized + TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( size ); + m2.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowLengths, rowCapacities ); + + out = 0.0; + m2.vectorProduct( in, out ); + //std::cerr << out << std::endl; + for( IndexType i = 0; i < size; i++ ) + EXPECT_EQ( out.getElement( i ), ( i + 1 ) * ( i + 2 ) / 2 ); } } -- GitLab