Loading .gitlab-ci.yml +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ stages: WITH_MPI: "no" WITH_TESTS: "yes" WITH_COVERAGE: "no" WITH_DOC: "no" # these are built only in the "full" config WITH_BENCHMARKS: "no" WITH_EXAMPLES: "no" Loading Loading @@ -55,6 +56,7 @@ stages: -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_MIC=${WITH_MIC} -DWITH_TESTS=${WITH_TESTS} -DWITH_DOC=${WITH_DOC} -DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_EXAMPLES=${WITH_EXAMPLES} Loading CMakeLists.txt +13 −6 Original line number Diff line number Diff line Loading @@ -7,7 +7,9 @@ # ############################################################################### cmake_minimum_required( VERSION 3.5.1 ) # cmake 3.12.2 is required due to compatibility with CUDA 10 # (see the issue reported here: https://github.com/clab/dynet/issues/1457 ) cmake_minimum_required( VERSION 3.12.2 ) project( tnl ) Loading Loading @@ -62,8 +64,8 @@ endif() # check if the compiler is good enough if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # GCC 5.0 is the first release with full C++11 support (due to libstdc++) # https://gcc.gnu.org/gcc-5/changes.html # GCC 5.0 is the first release with full C++11 support (due to libstdc++) as # well as full C++14 support: https://gcc.gnu.org/gcc-5/changes.html if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "Insufficient GCC version") endif() Loading @@ -74,8 +76,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() endif() # set C++ standard set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) Loading Loading @@ -147,7 +154,7 @@ endif() # Check for CUDA # if( ${WITH_CUDA} ) find_package( CUDA ) find_package( CUDA 9.0 ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) Loading Loading @@ -186,7 +193,7 @@ if( ${WITH_CUDA} ) # disable false compiler warnings # reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937 # incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=2906 --display_error_number\"") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --diag_suppress=2906 --diag_suppress=2913 --display_error_number\"") set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_32,code=sm_32 Loading Documentation/Doxyfile +2 −1 Original line number Diff line number Diff line Loading @@ -2107,7 +2107,8 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. PREDEFINED = PREDEFINED = HAVE_MPI=1 HAVE_CUDA=1 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The Loading src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +4 −6 Original line number Diff line number Diff line Loading @@ -163,9 +163,8 @@ struct SpmvBenchmark { MatrixType matrix; VectorType vector; if( ! matrix.load( parameters.getParameter< String >( "input-matrix" ) ) || ! vector.load( parameters.getParameter< String >( "input-vector" ) ) ) return false; matrix.load( parameters.getParameter< String >( "input-matrix" ) ); File( parameters.getParameter< String >( "input-vector" ), std::ios_base::in ) >> vector; typename MatrixType::CompressedRowLengthsVector rowLengths; matrix.getCompressedRowLengths( rowLengths ); Loading Loading @@ -267,8 +266,7 @@ struct SpmvBenchmark distributedMatrix.vectorProduct( distributedVector, distributedY ); const int rank = CommunicatorType::GetRank( distributedMatrix.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( distributedMatrix.getCommunicationGroup() ); VectorType subY( y, Partitioner::getOffset( matrix.getRows(), rank, nproc ), typename VectorType::ViewType subY( &y[ Partitioner::getOffset( matrix.getRows(), rank, nproc ) ], Partitioner::getSizeForRank( matrix.getRows(), rank, nproc ) ); TNL_ASSERT_EQ( distributedY.getLocalVectorView(), subY, "WRONG RESULT !!!" ); #endif Loading src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +9 −4 Original line number Diff line number Diff line Loading @@ -144,7 +144,11 @@ setInitialCondition( const Config::ParameterContainer& parameters, { const String& initialConditionFile = parameters.getParameter< String >( "initial-condition" ); Functions::MeshFunction< Mesh > u( this->getMesh(), dofsPointer ); if( ! u.boundLoad( initialConditionFile ) ) try { u.boundLoad( initialConditionFile ); } catch(...) { std::cerr << "I am not able to load the initial condition from the file " << initialConditionFile << "." << std::endl; return false; Loading Loading @@ -200,8 +204,7 @@ makeSnapshot( const RealType& time, fileName.setIndex( step ); //FileNameBaseNumberEnding( "u-", step, 5, ".tnl", fileName ); if( ! u.save( fileName.getFileName() ) ) return false; u.save( fileName.getFileName() ); return true; } Loading Loading @@ -595,11 +598,13 @@ getExplicitUpdate( const RealType& time, userData.real_u = uDofs->getData(); userData.real_fu = fuDofs->getData(); #endif /* const IndexType gridXSize = mesh->getDimensions().x(); const IndexType gridYSize = mesh->getDimensions().y(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); */ TNL::Devices::Cuda::synchronizeDevice(); int cudaErr; Loading Loading
.gitlab-ci.yml +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ stages: WITH_MPI: "no" WITH_TESTS: "yes" WITH_COVERAGE: "no" WITH_DOC: "no" # these are built only in the "full" config WITH_BENCHMARKS: "no" WITH_EXAMPLES: "no" Loading Loading @@ -55,6 +56,7 @@ stages: -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_MIC=${WITH_MIC} -DWITH_TESTS=${WITH_TESTS} -DWITH_DOC=${WITH_DOC} -DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_EXAMPLES=${WITH_EXAMPLES} Loading
CMakeLists.txt +13 −6 Original line number Diff line number Diff line Loading @@ -7,7 +7,9 @@ # ############################################################################### cmake_minimum_required( VERSION 3.5.1 ) # cmake 3.12.2 is required due to compatibility with CUDA 10 # (see the issue reported here: https://github.com/clab/dynet/issues/1457 ) cmake_minimum_required( VERSION 3.12.2 ) project( tnl ) Loading Loading @@ -62,8 +64,8 @@ endif() # check if the compiler is good enough if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # GCC 5.0 is the first release with full C++11 support (due to libstdc++) # https://gcc.gnu.org/gcc-5/changes.html # GCC 5.0 is the first release with full C++11 support (due to libstdc++) as # well as full C++14 support: https://gcc.gnu.org/gcc-5/changes.html if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "Insufficient GCC version") endif() Loading @@ -74,8 +76,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() endif() # set C++ standard set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) Loading Loading @@ -147,7 +154,7 @@ endif() # Check for CUDA # if( ${WITH_CUDA} ) find_package( CUDA ) find_package( CUDA 9.0 ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) Loading Loading @@ -186,7 +193,7 @@ if( ${WITH_CUDA} ) # disable false compiler warnings # reference for the -Xcudafe --diag_suppress and --display_error_number flags: https://stackoverflow.com/a/54142937 # incomplete list of tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=2906 --display_error_number\"") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Wno-deprecated-gpu-targets --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "\"--diag_suppress=code_is_unreachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --diag_suppress=2906 --diag_suppress=2913 --display_error_number\"") set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_32,code=sm_32 Loading
Documentation/Doxyfile +2 −1 Original line number Diff line number Diff line Loading @@ -2107,7 +2107,8 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. PREDEFINED = PREDEFINED = HAVE_MPI=1 HAVE_CUDA=1 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The Loading
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +4 −6 Original line number Diff line number Diff line Loading @@ -163,9 +163,8 @@ struct SpmvBenchmark { MatrixType matrix; VectorType vector; if( ! matrix.load( parameters.getParameter< String >( "input-matrix" ) ) || ! vector.load( parameters.getParameter< String >( "input-vector" ) ) ) return false; matrix.load( parameters.getParameter< String >( "input-matrix" ) ); File( parameters.getParameter< String >( "input-vector" ), std::ios_base::in ) >> vector; typename MatrixType::CompressedRowLengthsVector rowLengths; matrix.getCompressedRowLengths( rowLengths ); Loading Loading @@ -267,8 +266,7 @@ struct SpmvBenchmark distributedMatrix.vectorProduct( distributedVector, distributedY ); const int rank = CommunicatorType::GetRank( distributedMatrix.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( distributedMatrix.getCommunicationGroup() ); VectorType subY( y, Partitioner::getOffset( matrix.getRows(), rank, nproc ), typename VectorType::ViewType subY( &y[ Partitioner::getOffset( matrix.getRows(), rank, nproc ) ], Partitioner::getSizeForRank( matrix.getRows(), rank, nproc ) ); TNL_ASSERT_EQ( distributedY.getLocalVectorView(), subY, "WRONG RESULT !!!" ); #endif Loading
src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +9 −4 Original line number Diff line number Diff line Loading @@ -144,7 +144,11 @@ setInitialCondition( const Config::ParameterContainer& parameters, { const String& initialConditionFile = parameters.getParameter< String >( "initial-condition" ); Functions::MeshFunction< Mesh > u( this->getMesh(), dofsPointer ); if( ! u.boundLoad( initialConditionFile ) ) try { u.boundLoad( initialConditionFile ); } catch(...) { std::cerr << "I am not able to load the initial condition from the file " << initialConditionFile << "." << std::endl; return false; Loading Loading @@ -200,8 +204,7 @@ makeSnapshot( const RealType& time, fileName.setIndex( step ); //FileNameBaseNumberEnding( "u-", step, 5, ".tnl", fileName ); if( ! u.save( fileName.getFileName() ) ) return false; u.save( fileName.getFileName() ); return true; } Loading Loading @@ -595,11 +598,13 @@ getExplicitUpdate( const RealType& time, userData.real_u = uDofs->getData(); userData.real_fu = fuDofs->getData(); #endif /* const IndexType gridXSize = mesh->getDimensions().x(); const IndexType gridYSize = mesh->getDimensions().y(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); */ TNL::Devices::Cuda::synchronizeDevice(); int cudaErr; Loading