Loading .gitlab-ci.yml +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ stages: WITH_MPI: "no" WITH_TESTS: "yes" WITH_COVERAGE: "no" WITH_DOC: "no" # these are built only in the "full" config WITH_BENCHMARKS: "no" WITH_EXAMPLES: "no" Loading Loading @@ -55,6 +56,7 @@ stages: -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_MIC=${WITH_MIC} -DWITH_TESTS=${WITH_TESTS} -DWITH_DOC=${WITH_DOC} -DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_EXAMPLES=${WITH_EXAMPLES} Loading CMakeLists.txt +12 −5 Original line number Diff line number Diff line Loading @@ -7,7 +7,9 @@ # ############################################################################### cmake_minimum_required( VERSION 3.5.1 ) # cmake 3.12.2 is required due to compatibility with CUDA 10 # (see the issue reported here: https://github.com/clab/dynet/issues/1457 ) cmake_minimum_required( VERSION 3.12.2 ) project( tnl ) Loading Loading @@ -62,8 +64,8 @@ endif() # check if the compiler is good enough if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # GCC 5.0 is the first release with full C++11 support (due to libstdc++) # https://gcc.gnu.org/gcc-5/changes.html # GCC 5.0 is the first release with full C++11 support (due to libstdc++) as # well as full C++14 support: https://gcc.gnu.org/gcc-5/changes.html if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "Insufficient GCC version") endif() Loading @@ -74,8 +76,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() endif() # set C++ standard set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) Loading Loading @@ -147,7 +154,7 @@ endif() # Check for CUDA # if( ${WITH_CUDA} ) find_package( CUDA ) find_package( CUDA 9.0 ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) Loading src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +3 −4 Original line number Diff line number Diff line Loading @@ -164,7 +164,7 @@ struct SpmvBenchmark MatrixType matrix; VectorType vector; matrix.load( parameters.getParameter< String >( "input-matrix" ) ); vector.load( parameters.getParameter< String >( "input-vector" ) ); File( parameters.getParameter< String >( "input-vector" ), std::ios_base::in ) >> vector; typename MatrixType::CompressedRowLengthsVector rowLengths; matrix.getCompressedRowLengths( rowLengths ); Loading Loading @@ -266,8 +266,7 @@ struct SpmvBenchmark distributedMatrix.vectorProduct( distributedVector, distributedY ); const int rank = CommunicatorType::GetRank( distributedMatrix.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( distributedMatrix.getCommunicationGroup() ); VectorType subY( y, Partitioner::getOffset( matrix.getRows(), rank, nproc ), typename VectorType::ViewType subY( &y[ Partitioner::getOffset( matrix.getRows(), rank, nproc ) ], Partitioner::getSizeForRank( matrix.getRows(), rank, nproc ) ); TNL_ASSERT_EQ( distributedY.getLocalVectorView(), subY, "WRONG RESULT !!!" ); #endif Loading src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +3 −1 Original line number Diff line number Diff line Loading @@ -598,11 +598,13 @@ getExplicitUpdate( const RealType& time, userData.real_u = uDofs->getData(); userData.real_fu = fuDofs->getData(); #endif /* const IndexType gridXSize = mesh->getDimensions().x(); const IndexType gridYSize = mesh->getDimensions().y(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); */ TNL::Devices::Cuda::synchronizeDevice(); int cudaErr; Loading src/Benchmarks/HeatEquation/Tuning/ExplicitUpdater.h +3 −1 Original line number Diff line number Diff line Loading @@ -187,7 +187,8 @@ class ExplicitUpdater typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter; ( *userData.fu )( entity ) = ( *userData.differentialOperator )( *userData.u, entity, userData.time ); + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time ); // TODO: fix the right hand side here !!! // + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time ); } Loading @@ -200,6 +201,7 @@ class ExplicitUpdater typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter; userData.real_fu[ entityIndex ] = ( *userData.differentialOperator )( mesh, userData.real_u, entityIndex, coordinates, userData.time ); // TODO: fix the right hand side here !!! // + 0.0; } Loading Loading
.gitlab-ci.yml +2 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ stages: WITH_MPI: "no" WITH_TESTS: "yes" WITH_COVERAGE: "no" WITH_DOC: "no" # these are built only in the "full" config WITH_BENCHMARKS: "no" WITH_EXAMPLES: "no" Loading Loading @@ -55,6 +56,7 @@ stages: -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_MIC=${WITH_MIC} -DWITH_TESTS=${WITH_TESTS} -DWITH_DOC=${WITH_DOC} -DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_EXAMPLES=${WITH_EXAMPLES} Loading
CMakeLists.txt +12 −5 Original line number Diff line number Diff line Loading @@ -7,7 +7,9 @@ # ############################################################################### cmake_minimum_required( VERSION 3.5.1 ) # cmake 3.12.2 is required due to compatibility with CUDA 10 # (see the issue reported here: https://github.com/clab/dynet/issues/1457 ) cmake_minimum_required( VERSION 3.12.2 ) project( tnl ) Loading Loading @@ -62,8 +64,8 @@ endif() # check if the compiler is good enough if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # GCC 5.0 is the first release with full C++11 support (due to libstdc++) # https://gcc.gnu.org/gcc-5/changes.html # GCC 5.0 is the first release with full C++11 support (due to libstdc++) as # well as full C++14 support: https://gcc.gnu.org/gcc-5/changes.html if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "Insufficient GCC version") endif() Loading @@ -74,8 +76,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() endif() # set C++ standard set( CMAKE_CXX_STANDARD 14 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) Loading Loading @@ -147,7 +154,7 @@ endif() # Check for CUDA # if( ${WITH_CUDA} ) find_package( CUDA ) find_package( CUDA 9.0 ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) Loading
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +3 −4 Original line number Diff line number Diff line Loading @@ -164,7 +164,7 @@ struct SpmvBenchmark MatrixType matrix; VectorType vector; matrix.load( parameters.getParameter< String >( "input-matrix" ) ); vector.load( parameters.getParameter< String >( "input-vector" ) ); File( parameters.getParameter< String >( "input-vector" ), std::ios_base::in ) >> vector; typename MatrixType::CompressedRowLengthsVector rowLengths; matrix.getCompressedRowLengths( rowLengths ); Loading Loading @@ -266,8 +266,7 @@ struct SpmvBenchmark distributedMatrix.vectorProduct( distributedVector, distributedY ); const int rank = CommunicatorType::GetRank( distributedMatrix.getCommunicationGroup() ); const int nproc = CommunicatorType::GetSize( distributedMatrix.getCommunicationGroup() ); VectorType subY( y, Partitioner::getOffset( matrix.getRows(), rank, nproc ), typename VectorType::ViewType subY( &y[ Partitioner::getOffset( matrix.getRows(), rank, nproc ) ], Partitioner::getSizeForRank( matrix.getRows(), rank, nproc ) ); TNL_ASSERT_EQ( distributedY.getLocalVectorView(), subY, "WRONG RESULT !!!" ); #endif Loading
src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +3 −1 Original line number Diff line number Diff line Loading @@ -598,11 +598,13 @@ getExplicitUpdate( const RealType& time, userData.real_u = uDofs->getData(); userData.real_fu = fuDofs->getData(); #endif /* const IndexType gridXSize = mesh->getDimensions().x(); const IndexType gridYSize = mesh->getDimensions().y(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); */ TNL::Devices::Cuda::synchronizeDevice(); int cudaErr; Loading
src/Benchmarks/HeatEquation/Tuning/ExplicitUpdater.h +3 −1 Original line number Diff line number Diff line Loading @@ -187,7 +187,8 @@ class ExplicitUpdater typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter; ( *userData.fu )( entity ) = ( *userData.differentialOperator )( *userData.u, entity, userData.time ); + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time ); // TODO: fix the right hand side here !!! // + FunctionAdapter::getValue( *userData.rightHandSide, entity, userData.time ); } Loading @@ -200,6 +201,7 @@ class ExplicitUpdater typedef Functions::FunctionAdapter< MeshType, RightHandSide > FunctionAdapter; userData.real_fu[ entityIndex ] = ( *userData.differentialOperator )( mesh, userData.real_u, entityIndex, coordinates, userData.time ); // TODO: fix the right hand side here !!! // + 0.0; } Loading