Loading CMakeLists.txt +12 −4 Original line number Diff line number Diff line Loading @@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode Loading @@ -109,6 +109,14 @@ add_compile_options( "$<$<CONFIG:RELEASE>:-march=native;-mtune=native>" ) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) # disable some unimportant warnings set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) # Intel's -Wall is very minimalistic, so add -w3 and disable some specific warnings set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w3 -diag-disable:remark" ) endif() # disable GCC's infamous "maybe-uninitialized" warning (it produces mostly false positives) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized" ) Loading @@ -135,9 +143,9 @@ endif() # force colorized output in continuous integration if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" ) message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.") if( CXX_COMPILER_NAME MATCHES "clang" ) if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics" ) else() elseif( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color" ) endif() endif() Loading build +90 −81 Original line number Diff line number Diff line #!/bin/bash # exit as soon as there is an error set -e TARGET=TNL PREFIX=${HOME}/.local INSTALL="no" ROOT_DIR="." DCMTK_DIR="/usr/include/dcmtk" # get the root directory (i.e. the directory where this script is located) ROOT_DIR="$( builtin cd -P "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" BUILD="" BUILD_JOBS="" CMAKE="cmake" CMAKE_ONLY="no" HELP="no" VERBOSE="" OFFLINE_BUILD="no" INSTALL="no" PREFIX=${HOME}/.local CMAKE="cmake" CMAKE_ONLY="no" COMPILER="gcc" DCMTK_DIR="/usr/include/dcmtk" WITH_CLANG="no" WITH_MPI="yes" WITH_CUDA="yes" WITH_CUDA_ARCH="auto" Loading @@ -34,21 +35,52 @@ WITH_TOOLS="yes" WITH_BENCHMARKS="yes" WITH_CI_FLAGS="no" for option in "$@" do case $option in --prefix=* ) PREFIX="${option#*=}" ;; --install=* ) INSTALL="${option#*=}" ;; --root-dir=* ) ROOT_DIR="${option#*=}" ;; --dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;; for option in "$@"; do if [[ "$option" == "--help" ]]; then echo "TNL build options:" echo "" echo " --help Write this help list and exit." echo " --build=Debug/Release Build type." echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --verbose Enables verbose build." echo " --offline-build=yes/no Disables online updates during the build. '$OFFLINE_BUILD' by default." echo " --install=yes/no Enables the installation of TNL files. '$INSTALL' by default." echo " --prefix=PATH Prefix for the installation directory. '$HOME/local' by default." echo " --cmake=CMAKE Path to the cmake command. '$CMAKE' by default." echo " --cmake-only=yes/no Run only the cmake command, don't actually build anything. '$CMAKE_ONLY' by default." echo " --compiler=gcc/clang/icc Selects the compiler to use. '$COMPILER' by default." echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir. '$DCMTK_DIR' by default." echo " --with-mpi=yes/no Enables MPI. '$WITH_MPI' by default (OpenMPI required)." echo " --with-cuda=yes/no Enables CUDA. '$WITH_CUDA' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. '$WITH_CUDA_ARCH' by default." echo " --with-openmp=yes/no Enables OpenMP. '$WITH_OPENMP' by default." echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. '$WITH_GMP' by default." echo " --with-tests=yes/no Enables compilation of unit tests. '$WITH_TESTS' by default." echo " --run-tests=yes/no Runs unit tests if they were compiled. '$RUN_TESTS' by default." echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is $TEST_JOBS by default." echo " --with-profiling=yes/no Enables code profiling compiler flags. '$WITH_PROFILING' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests (lcov is required). '$WITH_COVERAGE' by default." echo " --with-doc=yes/no Generate the documentation. '$WITH_DOC' by default." echo " --with-examples=yes/no Compile the 'src/Examples' directory. '$WITH_EXAMPLES' by default." echo " --with-python=yes/no Compile the Python bindings. '$WITH_PYTHON' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. '$WITH_TOOLS' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. '$WITH_BENCHMARKS' by default." exit 1 fi done for option in "$@"; do case "$option" in --build=* ) BUILD="${option#*=}" ;; --build-jobs=* ) BUILD_JOBS="${option#*=}" ;; --cmake=* ) CMAKE="${option#*=}" ;; --cmake-only=* ) CMAKE_ONLY="${option#*=}" ;; --verbose ) VERBOSE="VERBOSE=1" ;; --help ) HELP="yes" ;; --offline-build ) OFFLINE_BUILD="yes" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;; --install=* ) INSTALL="${option#*=}" ;; --prefix=* ) PREFIX="${option#*=}" ;; --cmake=* ) CMAKE="${option#*=}" ;; --cmake-only=* ) CMAKE_ONLY="${option#*=}" ;; --compiler=* ) COMPILER="${option#*=}" ;; --dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; Loading @@ -67,49 +99,26 @@ do --with-python=* ) WITH_PYTHON="${option#*=}" ;; --with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; echo "Unknown option ${option}. Use --help for more information." >&2 exit 1 esac done if [[ ${HELP} == "yes" ]]; then echo "TNL build options:" echo "" echo " --build=Debug/Release Build type." echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default." echo " --install=yes/no Enables the installation of TNL files." echo " --offline-build=yes/no Disables online updates during the build. 'no' by default." echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. 'no' by default." echo " --with-tests=yes/no Enables compilation of unit tests. 'yes' by default." echo " --run-tests=yes/no Runs unit tests if they were compiled. 'yes' by default." echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is 4 by default." echo " --with-profiling=yes/no Enables code profiling compiler falgs. 'no' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-doc=yes/no Generate the documentation. 'yes' by default." echo " --with-examples=yes/no Compile the 'src/Examples' directory. 'yes' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir." echo " --help Write this help." exit 1 fi if [[ ${WITH_CLANG} == "yes" ]]; then if [[ "$COMPILER" == "gcc" ]]; then export CXX=g++ export CC=gcc export CUDA_HOST_COMPILER=g++ elif [[ "$COMPILER" == "clang" ]]; then export CXX=clang++ export CC=clang export CUDA_HOST_COMPILER=clang++ elif [[ "$COMPILER" == "icc" ]]; then export CXX=icpc export CC=icc export CUDA_HOST_COMPILER=icpc else export CXX=g++ export CC=gcc echo "Error: the compiler '$COMPILER' is not supported. The only options are 'gcc', 'clang' and 'icc'." >&2 exit 1 fi if [[ ! $(command -v cmake) ]]; then Loading Loading @@ -160,7 +169,7 @@ else last_cmake_command="" fi if [[ ! -f "$check_file" ]] || [[ "$last_cmake_command" != "${cmake_command[@]}" ]]; then echo "Configuring ${BUILD} $TARGET ..." echo "Configuring ${BUILD} TNL ..." "${cmake_command[@]}" echo -n "${cmake_command[@]}" > ".cmake_command" fi Loading Loading @@ -190,10 +199,10 @@ else fi if [[ -n ${BUILD_JOBS} ]]; then echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..." echo "Building ${BUILD} TNL using $BUILD_JOBS processors ..." else # number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment echo "Building ${BUILD} $TARGET ..." echo "Building ${BUILD} TNL ..." fi if [[ "$INSTALL" == "yes" ]]; then Loading install +2 −2 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ if [[ ${BUILD_DEBUG} == "yes" ]]; then mkdir Debug fi pushd Debug if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}; then if ! ../build --build=Debug --install=yes ${OPTIONS}; then echo "Debug build failed." exit 1 fi Loading @@ -47,7 +47,7 @@ if [[ ${BUILD_RELEASE} == "yes" ]]; then mkdir Release fi pushd Release if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; then if ! ../build --build=Release --install=yes ${OPTIONS}; then echo "Release build failed." exit 1 fi Loading src/TNL/Algorithms/MemoryOperationsCuda.hpp +8 −8 Original line number Diff line number Diff line Loading @@ -56,19 +56,19 @@ getElement( const Element* data ) #ifdef __CUDA_ARCH__ return *data; #else Element result; #ifdef HAVE_CUDA cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost ); TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); #endif // TODO: For some reason the following does not work after adding // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement // Probably it might be a problem with lambda function 'kernel' which // nvcc probably does not handle properly. //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 ); #ifdef HAVE_CUDA Element result; cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost ); TNL_CHECK_CUDA_DEVICE; return result; #else throw Exceptions::CudaSupportMissing(); #endif #endif } Loading src/TNL/Algorithms/Segments/BiEllpackView.hpp +16 −16 Original line number Diff line number Diff line Loading @@ -149,11 +149,6 @@ template< typename Device, __cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -168,6 +163,11 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType segmentIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); } template< typename Device, Loading Loading @@ -197,12 +197,6 @@ template< typename Device, __cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, localIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -219,6 +213,12 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp localIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, localIdx ); } template< typename Device, Loading @@ -230,11 +230,6 @@ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -249,6 +244,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType segmentIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); } template< typename Device, Loading Loading
CMakeLists.txt +12 −4 Original line number Diff line number Diff line Loading @@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode Loading @@ -109,6 +109,14 @@ add_compile_options( "$<$<CONFIG:RELEASE>:-march=native;-mtune=native>" ) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) # disable some unimportant warnings set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) # Intel's -Wall is very minimalistic, so add -w3 and disable some specific warnings set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w3 -diag-disable:remark" ) endif() # disable GCC's infamous "maybe-uninitialized" warning (it produces mostly false positives) if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized" ) Loading @@ -135,9 +143,9 @@ endif() # force colorized output in continuous integration if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" ) message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.") if( CXX_COMPILER_NAME MATCHES "clang" ) if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics" ) else() elseif( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color" ) endif() endif() Loading
build +90 −81 Original line number Diff line number Diff line #!/bin/bash # exit as soon as there is an error set -e TARGET=TNL PREFIX=${HOME}/.local INSTALL="no" ROOT_DIR="." DCMTK_DIR="/usr/include/dcmtk" # get the root directory (i.e. the directory where this script is located) ROOT_DIR="$( builtin cd -P "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" BUILD="" BUILD_JOBS="" CMAKE="cmake" CMAKE_ONLY="no" HELP="no" VERBOSE="" OFFLINE_BUILD="no" INSTALL="no" PREFIX=${HOME}/.local CMAKE="cmake" CMAKE_ONLY="no" COMPILER="gcc" DCMTK_DIR="/usr/include/dcmtk" WITH_CLANG="no" WITH_MPI="yes" WITH_CUDA="yes" WITH_CUDA_ARCH="auto" Loading @@ -34,21 +35,52 @@ WITH_TOOLS="yes" WITH_BENCHMARKS="yes" WITH_CI_FLAGS="no" for option in "$@" do case $option in --prefix=* ) PREFIX="${option#*=}" ;; --install=* ) INSTALL="${option#*=}" ;; --root-dir=* ) ROOT_DIR="${option#*=}" ;; --dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;; for option in "$@"; do if [[ "$option" == "--help" ]]; then echo "TNL build options:" echo "" echo " --help Write this help list and exit." echo " --build=Debug/Release Build type." echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --verbose Enables verbose build." echo " --offline-build=yes/no Disables online updates during the build. '$OFFLINE_BUILD' by default." echo " --install=yes/no Enables the installation of TNL files. '$INSTALL' by default." echo " --prefix=PATH Prefix for the installation directory. '$HOME/local' by default." echo " --cmake=CMAKE Path to the cmake command. '$CMAKE' by default." echo " --cmake-only=yes/no Run only the cmake command, don't actually build anything. '$CMAKE_ONLY' by default." echo " --compiler=gcc/clang/icc Selects the compiler to use. '$COMPILER' by default." echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir. '$DCMTK_DIR' by default." echo " --with-mpi=yes/no Enables MPI. '$WITH_MPI' by default (OpenMPI required)." echo " --with-cuda=yes/no Enables CUDA. '$WITH_CUDA' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. '$WITH_CUDA_ARCH' by default." echo " --with-openmp=yes/no Enables OpenMP. '$WITH_OPENMP' by default." echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. '$WITH_GMP' by default." echo " --with-tests=yes/no Enables compilation of unit tests. '$WITH_TESTS' by default." echo " --run-tests=yes/no Runs unit tests if they were compiled. '$RUN_TESTS' by default." echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is $TEST_JOBS by default." echo " --with-profiling=yes/no Enables code profiling compiler flags. '$WITH_PROFILING' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests (lcov is required). '$WITH_COVERAGE' by default." echo " --with-doc=yes/no Generate the documentation. '$WITH_DOC' by default." echo " --with-examples=yes/no Compile the 'src/Examples' directory. '$WITH_EXAMPLES' by default." echo " --with-python=yes/no Compile the Python bindings. '$WITH_PYTHON' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. '$WITH_TOOLS' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. '$WITH_BENCHMARKS' by default." exit 1 fi done for option in "$@"; do case "$option" in --build=* ) BUILD="${option#*=}" ;; --build-jobs=* ) BUILD_JOBS="${option#*=}" ;; --cmake=* ) CMAKE="${option#*=}" ;; --cmake-only=* ) CMAKE_ONLY="${option#*=}" ;; --verbose ) VERBOSE="VERBOSE=1" ;; --help ) HELP="yes" ;; --offline-build ) OFFLINE_BUILD="yes" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;; --install=* ) INSTALL="${option#*=}" ;; --prefix=* ) PREFIX="${option#*=}" ;; --cmake=* ) CMAKE="${option#*=}" ;; --cmake-only=* ) CMAKE_ONLY="${option#*=}" ;; --compiler=* ) COMPILER="${option#*=}" ;; --dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; Loading @@ -67,49 +99,26 @@ do --with-python=* ) WITH_PYTHON="${option#*=}" ;; --with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; echo "Unknown option ${option}. Use --help for more information." >&2 exit 1 esac done if [[ ${HELP} == "yes" ]]; then echo "TNL build options:" echo "" echo " --build=Debug/Release Build type." echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default." echo " --install=yes/no Enables the installation of TNL files." echo " --offline-build=yes/no Disables online updates during the build. 'no' by default." echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. 'no' by default." echo " --with-tests=yes/no Enables compilation of unit tests. 'yes' by default." echo " --run-tests=yes/no Runs unit tests if they were compiled. 'yes' by default." echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is 4 by default." echo " --with-profiling=yes/no Enables code profiling compiler falgs. 'no' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-doc=yes/no Generate the documentation. 'yes' by default." echo " --with-examples=yes/no Compile the 'src/Examples' directory. 'yes' by default." echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir." echo " --help Write this help." exit 1 fi if [[ ${WITH_CLANG} == "yes" ]]; then if [[ "$COMPILER" == "gcc" ]]; then export CXX=g++ export CC=gcc export CUDA_HOST_COMPILER=g++ elif [[ "$COMPILER" == "clang" ]]; then export CXX=clang++ export CC=clang export CUDA_HOST_COMPILER=clang++ elif [[ "$COMPILER" == "icc" ]]; then export CXX=icpc export CC=icc export CUDA_HOST_COMPILER=icpc else export CXX=g++ export CC=gcc echo "Error: the compiler '$COMPILER' is not supported. The only options are 'gcc', 'clang' and 'icc'." >&2 exit 1 fi if [[ ! $(command -v cmake) ]]; then Loading Loading @@ -160,7 +169,7 @@ else last_cmake_command="" fi if [[ ! -f "$check_file" ]] || [[ "$last_cmake_command" != "${cmake_command[@]}" ]]; then echo "Configuring ${BUILD} $TARGET ..." echo "Configuring ${BUILD} TNL ..." "${cmake_command[@]}" echo -n "${cmake_command[@]}" > ".cmake_command" fi Loading Loading @@ -190,10 +199,10 @@ else fi if [[ -n ${BUILD_JOBS} ]]; then echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..." echo "Building ${BUILD} TNL using $BUILD_JOBS processors ..." else # number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment echo "Building ${BUILD} $TARGET ..." echo "Building ${BUILD} TNL ..." fi if [[ "$INSTALL" == "yes" ]]; then Loading
install +2 −2 Original line number Diff line number Diff line Loading @@ -35,7 +35,7 @@ if [[ ${BUILD_DEBUG} == "yes" ]]; then mkdir Debug fi pushd Debug if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}; then if ! ../build --build=Debug --install=yes ${OPTIONS}; then echo "Debug build failed." exit 1 fi Loading @@ -47,7 +47,7 @@ if [[ ${BUILD_RELEASE} == "yes" ]]; then mkdir Release fi pushd Release if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; then if ! ../build --build=Release --install=yes ${OPTIONS}; then echo "Release build failed." exit 1 fi Loading
src/TNL/Algorithms/MemoryOperationsCuda.hpp +8 −8 Original line number Diff line number Diff line Loading @@ -56,19 +56,19 @@ getElement( const Element* data ) #ifdef __CUDA_ARCH__ return *data; #else Element result; #ifdef HAVE_CUDA cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost ); TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); #endif // TODO: For some reason the following does not work after adding // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement // Probably it might be a problem with lambda function 'kernel' which // nvcc probably does not handle properly. //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 ); #ifdef HAVE_CUDA Element result; cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost ); TNL_CHECK_CUDA_DEVICE; return result; #else throw Exceptions::CudaSupportMissing(); #endif #endif } Loading
src/TNL/Algorithms/Segments/BiEllpackView.hpp +16 −16 Original line number Diff line number Diff line Loading @@ -149,11 +149,6 @@ template< typename Device, __cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -168,6 +163,11 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType segmentIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); } template< typename Device, Loading Loading @@ -197,12 +197,6 @@ template< typename Device, __cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, localIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -219,6 +213,12 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp localIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, localIdx ); } template< typename Device, Loading @@ -230,11 +230,6 @@ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( std::is_same< DeviceType, Devices::Host >::value ) return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ Loading @@ -249,6 +244,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType segmentIdx ); #endif } else return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); } template< typename Device, Loading