Merge branch 'JK/icc' into 'develop' (1fdc65ec) · Commits · TNL / tnl-dev

CMakeLists.txt

+12 −4

Original line number	Diff line number	Diff line
		@@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
		set( CMAKE_CXX_EXTENSIONS OFF )

		# set default build options
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" )
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall" )
		set( CMAKE_CXX_FLAGS_DEBUG "-g" )
		set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" )
		# pass -rdynamic only in Debug mode
		@@ -109,6 +109,14 @@ add_compile_options(
		"$<$<CONFIG:RELEASE>:-march=native;-mtune=native>"
		)

		if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
		# disable some unimportant warnings
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unknown-pragmas" )
		elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
		# Intel's -Wall is very minimalistic, so add -w3 and disable some specific warnings
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w3 -diag-disable:remark" )
		endif()

		# disable GCC's infamous "maybe-uninitialized" warning (it produces mostly false positives)
		if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized" )
		@@ -135,9 +143,9 @@ endif()
		# force colorized output in continuous integration
		if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
		message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
		if( CXX_COMPILER_NAME MATCHES "clang" )
		if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics" )
		else()
		elseif( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color" )
		endif()
		endif()

build

+90 −81

Original line number	Diff line number	Diff line
		#!/bin/bash

		# exit as soon as there is an error
		set -e

		TARGET=TNL
		PREFIX=${HOME}/.local
		INSTALL="no"
		ROOT_DIR="."
		DCMTK_DIR="/usr/include/dcmtk"
		# get the root directory (i.e. the directory where this script is located)
		ROOT_DIR="$( builtin cd -P "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

		BUILD=""
		BUILD_JOBS=""
		CMAKE="cmake"
		CMAKE_ONLY="no"
		HELP="no"
		VERBOSE=""
		OFFLINE_BUILD="no"
		INSTALL="no"
		PREFIX=${HOME}/.local
		CMAKE="cmake"
		CMAKE_ONLY="no"
		COMPILER="gcc"
		DCMTK_DIR="/usr/include/dcmtk"

		WITH_CLANG="no"
		WITH_MPI="yes"
		WITH_CUDA="yes"
		WITH_CUDA_ARCH="auto"
		@@ -34,21 +35,52 @@ WITH_TOOLS="yes"
		WITH_BENCHMARKS="yes"
		WITH_CI_FLAGS="no"

		for option in "$@"
		do
		case $option in
		--prefix=* ) PREFIX="${option#*=}" ;;
		--install=* ) INSTALL="${option#*=}" ;;
		--root-dir=* ) ROOT_DIR="${option#*=}" ;;
		--dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;;
		for option in "$@"; do
		if [[ "$option" == "--help" ]]; then
		echo "TNL build options:"
		echo ""
		echo " --help Write this help list and exit."
		echo " --build=Debug/Release Build type."
		echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default."
		echo " --verbose Enables verbose build."
		echo " --offline-build=yes/no Disables online updates during the build. '$OFFLINE_BUILD' by default."
		echo " --install=yes/no Enables the installation of TNL files. '$INSTALL' by default."
		echo " --prefix=PATH Prefix for the installation directory. '$HOME/local' by default."
		echo " --cmake=CMAKE Path to the cmake command. '$CMAKE' by default."
		echo " --cmake-only=yes/no Run only the cmake command, don't actually build anything. '$CMAKE_ONLY' by default."
		echo " --compiler=gcc/clang/icc Selects the compiler to use. '$COMPILER' by default."
		echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir. '$DCMTK_DIR' by default."
		echo " --with-mpi=yes/no Enables MPI. '$WITH_MPI' by default (OpenMPI required)."
		echo " --with-cuda=yes/no Enables CUDA. '$WITH_CUDA' by default (CUDA Toolkit is required)."
		echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. '$WITH_CUDA_ARCH' by default."
		echo " --with-openmp=yes/no Enables OpenMP. '$WITH_OPENMP' by default."
		echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. '$WITH_GMP' by default."
		echo " --with-tests=yes/no Enables compilation of unit tests. '$WITH_TESTS' by default."
		echo " --run-tests=yes/no Runs unit tests if they were compiled. '$RUN_TESTS' by default."
		echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is $TEST_JOBS by default."
		echo " --with-profiling=yes/no Enables code profiling compiler flags. '$WITH_PROFILING' by default."
		echo " --with-coverage=yes/no Enables code coverage reports for unit tests (lcov is required). '$WITH_COVERAGE' by default."
		echo " --with-doc=yes/no Generate the documentation. '$WITH_DOC' by default."
		echo " --with-examples=yes/no Compile the 'src/Examples' directory. '$WITH_EXAMPLES' by default."
		echo " --with-python=yes/no Compile the Python bindings. '$WITH_PYTHON' by default."
		echo " --with-tools=yes/no Compile the 'src/Tools' directory. '$WITH_TOOLS' by default."
		echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. '$WITH_BENCHMARKS' by default."
		exit 1
		fi
		done

		for option in "$@"; do
		case "$option" in
		--build=* ) BUILD="${option#*=}" ;;
		--build-jobs=* ) BUILD_JOBS="${option#*=}" ;;
		--cmake=* ) CMAKE="${option#*=}" ;;
		--cmake-only=* ) CMAKE_ONLY="${option#*=}" ;;
		--verbose ) VERBOSE="VERBOSE=1" ;;
		--help ) HELP="yes" ;;
		--offline-build ) OFFLINE_BUILD="yes" ;;
		--with-clang=* ) WITH_CLANG="${option#*=}" ;;
		--install=* ) INSTALL="${option#*=}" ;;
		--prefix=* ) PREFIX="${option#*=}" ;;
		--cmake=* ) CMAKE="${option#*=}" ;;
		--cmake-only=* ) CMAKE_ONLY="${option#*=}" ;;
		--compiler=* ) COMPILER="${option#*=}" ;;
		--dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;;
		--with-mpi=* ) WITH_MPI="${option#*=}" ;;
		--with-cuda=* ) WITH_CUDA="${option#*=}" ;;
		--with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";;
		@@ -67,49 +99,26 @@ do
		--with-python=* ) WITH_PYTHON="${option#*=}" ;;
		--with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;;
		* )
		echo "Unknown option ${option}. Use --help for more information."
		exit 1 ;;
		echo "Unknown option ${option}. Use --help for more information." >&2
		exit 1
		esac
		done

		if [[ ${HELP} == "yes" ]]; then
		echo "TNL build options:"
		echo ""
		echo " --build=Debug/Release Build type."
		echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default."
		echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default."
		echo " --install=yes/no Enables the installation of TNL files."
		echo " --offline-build=yes/no Disables online updates during the build. 'no' by default."
		echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)."
		echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
		echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
		echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default."
		echo " --with-gmp=yes/no Enables the wrapper for GNU Multiple Precision Arithmetic Library. 'no' by default."
		echo " --with-tests=yes/no Enables compilation of unit tests. 'yes' by default."
		echo " --run-tests=yes/no Runs unit tests if they were compiled. 'yes' by default."
		echo " --tests-jobs=NUM Number of processes to be used for the unit tests. It is 4 by default."
		echo " --with-profiling=yes/no Enables code profiling compiler falgs. 'no' by default."
		echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)."
		echo " --with-doc=yes/no Generate the documentation. 'yes' by default."
		echo " --with-examples=yes/no Compile the 'src/Examples' directory. 'yes' by default."
		echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default."
		echo " --with-python=yes/no Compile the Python bindings. 'yes' by default."
		echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default."
		echo " --cmake=CMAKE Path to cmake. 'cmake' by default."
		echo " --verbose It enables verbose build."
		echo " --root-dir=PATH Path to the TNL source code root dir."
		echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir."
		echo " --help Write this help."
		exit 1
		fi

		if [[ ${WITH_CLANG} == "yes" ]]; then
		if [[ "$COMPILER" == "gcc" ]]; then
		export CXX=g++
		export CC=gcc
		export CUDA_HOST_COMPILER=g++
		elif [[ "$COMPILER" == "clang" ]]; then
		export CXX=clang++
		export CC=clang
		export CUDA_HOST_COMPILER=clang++
		elif [[ "$COMPILER" == "icc" ]]; then
		export CXX=icpc
		export CC=icc
		export CUDA_HOST_COMPILER=icpc
		else
		export CXX=g++
		export CC=gcc
		echo "Error: the compiler '$COMPILER' is not supported. The only options are 'gcc', 'clang' and 'icc'." >&2
		exit 1
		fi

		if [[ ! $(command -v cmake) ]]; then
		@@ -160,7 +169,7 @@ else
		last_cmake_command=""
		fi
		if [[ ! -f "$check_file" ]] \|\| [[ "$last_cmake_command" != "${cmake_command[@]}" ]]; then
		echo "Configuring ${BUILD} $TARGET ..."
		echo "Configuring ${BUILD} TNL ..."
		"${cmake_command[@]}"
		echo -n "${cmake_command[@]}" > ".cmake_command"
		fi
		@@ -190,10 +199,10 @@ else
		fi

		if [[ -n ${BUILD_JOBS} ]]; then
		echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."
		echo "Building ${BUILD} TNL using $BUILD_JOBS processors ..."
		else
		# number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment
		echo "Building ${BUILD} $TARGET ..."
		echo "Building ${BUILD} TNL ..."
		fi

		if [[ "$INSTALL" == "yes" ]]; then

install

+2 −2

Original line number	Diff line number	Diff line
		@@ -35,7 +35,7 @@ if [[ ${BUILD_DEBUG} == "yes" ]]; then
		mkdir Debug
		fi
		pushd Debug
		if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}; then
		if ! ../build --build=Debug --install=yes ${OPTIONS}; then
		echo "Debug build failed."
		exit 1
		fi
		@@ -47,7 +47,7 @@ if [[ ${BUILD_RELEASE} == "yes" ]]; then
		mkdir Release
		fi
		pushd Release
		if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; then
		if ! ../build --build=Release --install=yes ${OPTIONS}; then
		echo "Release build failed."
		exit 1
		fi

src/TNL/Algorithms/MemoryOperationsCuda.hpp

+8 −8

Original line number	Diff line number	Diff line
		@@ -56,19 +56,19 @@ getElement( const Element* data )
		#ifdef __CUDA_ARCH__
		return *data;
		#else
		Element result;
		#ifdef HAVE_CUDA
		cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
		TNL_CHECK_CUDA_DEVICE;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		// TODO: For some reason the following does not work after adding
		// #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement
		// Probably it might be a problem with lambda function 'kernel' which
		// nvcc probably does not handle properly.
		//MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
		#ifdef HAVE_CUDA
		Element result;
		cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
		TNL_CHECK_CUDA_DEVICE;
		return result;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		#endif
		}

src/TNL/Algorithms/Segments/BiEllpackView.hpp

+16 −16

Original line number	Diff line number	Diff line
		@@ -149,11 +149,6 @@ template< typename Device,
		__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
		getSegmentSize( const IndexType segmentIdx ) const -> IndexType
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect(
		rowPermArray,
		groupPointers,
		segmentIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		@@ -168,6 +163,11 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType
		segmentIdx );
		#endif
		}
		else
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect(
		rowPermArray,
		groupPointers,
		segmentIdx );
		}

		template< typename Device,
		@@ -197,12 +197,6 @@ template< typename Device,
		__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
		getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect(
		rowPermArray,
		groupPointers,
		segmentIdx,
		localIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		@@ -219,6 +213,12 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
		localIdx );
		#endif
		}
		else
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect(
		rowPermArray,
		groupPointers,
		segmentIdx,
		localIdx );
		}

		template< typename Device,
		@@ -230,11 +230,6 @@ auto
		BiEllpackView< Device, Index, Organization, WarpSize >::
		getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
		{
		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect(
		rowPermArray,
		groupPointers,
		segmentIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		@@ -249,6 +244,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
		segmentIdx );
		#endif
		}
		else
		return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect(
		rowPermArray,
		groupPointers,
		segmentIdx );
		}

		template< typename Device,