Commit 1fdc65ec authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Merge branch 'JK/icc' into 'develop'

Support compilation with ICC

See merge request !67
parents 5c7f5331 aaccf135
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
set( CMAKE_CXX_EXTENSIONS OFF )

# set default build options
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall" )
set( CMAKE_CXX_FLAGS_DEBUG "-g" )
set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" )
# pass -rdynamic only in Debug mode
@@ -109,6 +109,14 @@ add_compile_options(
   "$<$<CONFIG:RELEASE>:-march=native;-mtune=native>"
)

if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
   # disable some unimportant warnings
   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unknown-pragmas" )
elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
   # Intel's -Wall is very minimalistic, so add -w3 and disable some specific warnings
   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w3 -diag-disable:remark" )
endif()

# disable GCC's infamous "maybe-uninitialized" warning (it produces mostly false positives)
if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized" )
@@ -135,9 +143,9 @@ endif()
# force colorized output in continuous integration
if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
   message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
   if( CXX_COMPILER_NAME MATCHES "clang" )
   if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
      set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics" )
   else()
   elseif( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
      set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color" )
   endif()
endif()
+90 −81
Original line number Diff line number Diff line
#!/bin/bash

# exit as soon as there is an error
set -e

TARGET=TNL
PREFIX=${HOME}/.local
INSTALL="no"
ROOT_DIR="."
DCMTK_DIR="/usr/include/dcmtk"
# get the root directory (i.e. the directory where this script is located)
ROOT_DIR="$( builtin cd -P "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

BUILD=""
BUILD_JOBS=""
CMAKE="cmake"
CMAKE_ONLY="no"
HELP="no"
VERBOSE=""
OFFLINE_BUILD="no"
INSTALL="no"
PREFIX=${HOME}/.local
CMAKE="cmake"
CMAKE_ONLY="no"
COMPILER="gcc"
DCMTK_DIR="/usr/include/dcmtk"

WITH_CLANG="no"
WITH_MPI="yes"
WITH_CUDA="yes"
WITH_CUDA_ARCH="auto"
@@ -34,21 +35,52 @@ WITH_TOOLS="yes"
WITH_BENCHMARKS="yes"
WITH_CI_FLAGS="no"

for option in "$@"
do
    case $option in
        --prefix=*                       ) PREFIX="${option#*=}" ;;
        --install=*                      ) INSTALL="${option#*=}" ;;
        --root-dir=*                     ) ROOT_DIR="${option#*=}" ;;
        --dcmtk-dir=*                    ) DCMTK_DIR="${option#*=}" ;;
for option in "$@"; do
   if [[ "$option" == "--help" ]]; then
      echo "TNL build options:"
      echo ""
      echo "   --help                                Write this help list and exit."
      echo "   --build=Debug/Release                 Build type."
      echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to the number of available CPU cores by default."
      echo "   --verbose                             Enables verbose build."
      echo "   --offline-build=yes/no                Disables online updates during the build. '$OFFLINE_BUILD' by default."
      echo "   --install=yes/no                      Enables the installation of TNL files. '$INSTALL' by default."
      echo "   --prefix=PATH                         Prefix for the installation directory. '$HOME/local' by default."
      echo "   --cmake=CMAKE                         Path to the cmake command. '$CMAKE' by default."
      echo "   --cmake-only=yes/no                   Run only the cmake command, don't actually build anything. '$CMAKE_ONLY' by default."
      echo "   --compiler=gcc/clang/icc              Selects the compiler to use. '$COMPILER' by default."
      echo "   --dcmtk-dir=PATH                      Path to the DCMTK (Dicom Toolkit) root dir. '$DCMTK_DIR' by default."
      echo "   --with-mpi=yes/no                     Enables MPI. '$WITH_MPI' by default (OpenMPI required)."
      echo "   --with-cuda=yes/no                    Enables CUDA. '$WITH_CUDA' by default (CUDA Toolkit is required)."
      echo "   --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. '$WITH_CUDA_ARCH' by default."
      echo "   --with-openmp=yes/no                  Enables OpenMP. '$WITH_OPENMP' by default."
      echo "   --with-gmp=yes/no                     Enables the wrapper for GNU Multiple Precision Arithmetic Library. '$WITH_GMP' by default."
      echo "   --with-tests=yes/no                   Enables compilation of unit tests. '$WITH_TESTS' by default."
      echo "   --run-tests=yes/no                    Runs unit tests if they were compiled. '$RUN_TESTS' by default."
      echo "   --tests-jobs=NUM                      Number of processes to be used for the unit tests. It is $TEST_JOBS by default."
      echo "   --with-profiling=yes/no               Enables code profiling compiler flags. '$WITH_PROFILING' by default."
      echo "   --with-coverage=yes/no                Enables code coverage reports for unit tests (lcov is required). '$WITH_COVERAGE' by default."
      echo "   --with-doc=yes/no                     Generate the documentation. '$WITH_DOC' by default."
      echo "   --with-examples=yes/no                Compile the 'src/Examples' directory. '$WITH_EXAMPLES' by default."
      echo "   --with-python=yes/no                  Compile the Python bindings. '$WITH_PYTHON' by default."
      echo "   --with-tools=yes/no                   Compile the 'src/Tools' directory. '$WITH_TOOLS' by default."
      echo "   --with-benchmarks=yes/no              Compile the 'src/Benchmarks' directory. '$WITH_BENCHMARKS' by default."
      exit 1
   fi
done

for option in "$@"; do
   case "$option" in
      --build=*                        ) BUILD="${option#*=}" ;;
      --build-jobs=*                   ) BUILD_JOBS="${option#*=}" ;;
        --cmake=*                        ) CMAKE="${option#*=}" ;;
        --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
      --verbose                        ) VERBOSE="VERBOSE=1" ;;
        --help                           ) HELP="yes" ;;
      --offline-build                  ) OFFLINE_BUILD="yes" ;;
        --with-clang=*                   ) WITH_CLANG="${option#*=}" ;;
      --install=*                      ) INSTALL="${option#*=}" ;;
      --prefix=*                       ) PREFIX="${option#*=}" ;;
      --cmake=*                        ) CMAKE="${option#*=}" ;;
      --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
      --compiler=*                     ) COMPILER="${option#*=}" ;;
      --dcmtk-dir=*                    ) DCMTK_DIR="${option#*=}" ;;
      --with-mpi=*                     ) WITH_MPI="${option#*=}" ;;
      --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
      --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
@@ -67,49 +99,26 @@ do
      --with-python=*                  ) WITH_PYTHON="${option#*=}" ;;
      --with-ci-flags=*                ) WITH_CI_FLAGS="${option#*=}" ;;
      *                                )
           echo "Unknown option ${option}. Use --help for more information."
           exit 1 ;;
         echo "Unknown option ${option}. Use --help for more information." >&2
         exit 1
   esac
done

if [[ ${HELP} == "yes" ]]; then
    echo "TNL build options:"
    echo ""
    echo "   --build=Debug/Release                 Build type."
    echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to the number of available CPU cores by default."
    echo "   --prefix=PATH                         Prefix for the installation directory. ${HOME}/local by default."
    echo "   --install=yes/no                      Enables the installation of TNL files."
    echo "   --offline-build=yes/no                Disables online updates during the build. 'no' by default."
    echo "   --with-mpi=yes/no                     Enables MPI. 'yes' by default (OpenMPI required)."
    echo "   --with-cuda=yes/no                    Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
    echo "   --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
    echo "   --with-openmp=yes/no                  Enables OpenMP. 'yes' by default."
    echo "   --with-gmp=yes/no                     Enables the wrapper for GNU Multiple Precision Arithmetic Library. 'no' by default."
    echo "   --with-tests=yes/no                   Enables compilation of unit tests. 'yes' by default."
    echo "   --run-tests=yes/no                    Runs unit tests if they were compiled. 'yes' by default."
    echo "   --tests-jobs=NUM                      Number of processes to be used for the unit tests. It is 4 by default."
    echo "   --with-profiling=yes/no               Enables code profiling compiler falgs. 'no' by default."
    echo "   --with-coverage=yes/no                Enables code coverage reports for unit tests. 'no' by default (lcov is required)."
    echo "   --with-doc=yes/no                     Generate the documentation. 'yes' by default."
    echo "   --with-examples=yes/no                Compile the 'src/Examples' directory. 'yes' by default."
    echo "   --with-tools=yes/no                   Compile the 'src/Tools' directory. 'yes' by default."
    echo "   --with-python=yes/no                  Compile the Python bindings. 'yes' by default."
    echo "   --with-benchmarks=yes/no              Compile the 'src/Benchmarks' directory. 'yes' by default."
    echo "   --cmake=CMAKE                         Path to cmake. 'cmake' by default."
    echo "   --verbose                             It enables verbose build."
    echo "   --root-dir=PATH                       Path to the TNL source code root dir."
    echo "   --dcmtk-dir=PATH                      Path to the DCMTK (Dicom Toolkit) root dir."
    echo "   --help                                Write this help."
    exit 1
fi

if [[ ${WITH_CLANG} == "yes" ]]; then
if [[ "$COMPILER" == "gcc" ]]; then
   export CXX=g++
   export CC=gcc
   export CUDA_HOST_COMPILER=g++
elif [[ "$COMPILER" == "clang" ]]; then
   export CXX=clang++
   export CC=clang
   export CUDA_HOST_COMPILER=clang++
elif [[ "$COMPILER" == "icc" ]]; then
   export CXX=icpc
   export CC=icc
   export CUDA_HOST_COMPILER=icpc
else
   export CXX=g++
   export CC=gcc
   echo "Error: the compiler '$COMPILER' is not supported. The only options are 'gcc', 'clang' and 'icc'." >&2
   exit 1
fi

if [[ ! $(command -v cmake) ]]; then
@@ -160,7 +169,7 @@ else
   last_cmake_command=""
fi
if [[ ! -f "$check_file" ]] || [[ "$last_cmake_command" != "${cmake_command[@]}" ]]; then
   echo "Configuring ${BUILD} $TARGET ..."
   echo "Configuring ${BUILD} TNL ..."
   "${cmake_command[@]}"
   echo -n "${cmake_command[@]}" > ".cmake_command"
fi
@@ -190,10 +199,10 @@ else
fi

if [[ -n ${BUILD_JOBS} ]]; then
   echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."
   echo "Building ${BUILD} TNL using $BUILD_JOBS processors ..."
else
   # number of processors is unknown - it is encoded in $MAKEFLAGS from parent environment
   echo "Building ${BUILD} $TARGET ..."
   echo "Building ${BUILD} TNL ..."
fi

if [[ "$INSTALL" == "yes" ]]; then
+2 −2
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ if [[ ${BUILD_DEBUG} == "yes" ]]; then
      mkdir Debug
   fi
   pushd Debug
   if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}; then
   if ! ../build --build=Debug --install=yes ${OPTIONS}; then
      echo "Debug build failed."
      exit 1
   fi
@@ -47,7 +47,7 @@ if [[ ${BUILD_RELEASE} == "yes" ]]; then
      mkdir Release
   fi
   pushd Release
   if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; then
   if ! ../build --build=Release --install=yes ${OPTIONS}; then
      echo "Release build failed."
      exit 1
   fi
+8 −8
Original line number Diff line number Diff line
@@ -56,19 +56,19 @@ getElement( const Element* data )
#ifdef __CUDA_ARCH__
   return *data;
#else
   Element result;
#ifdef HAVE_CUDA
   cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
   TNL_CHECK_CUDA_DEVICE;
#else
   throw Exceptions::CudaSupportMissing();
#endif
   // TODO: For some reason the following does not work after adding
   // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement 
   // Probably it might be a problem with lambda function 'kernel' which
   // nvcc probably does not handle properly.
   //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
   #ifdef HAVE_CUDA
      Element result;
      cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
      TNL_CHECK_CUDA_DEVICE;
      return result;
   #else
      throw Exceptions::CudaSupportMissing();
   #endif
#endif
}

+16 −16
Original line number Diff line number Diff line
@@ -149,11 +149,6 @@ template< typename Device,
__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
getSegmentSize( const IndexType segmentIdx ) const -> IndexType
{
   if( std::is_same< DeviceType, Devices::Host >::value )
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect(
         rowPermArray,
         groupPointers,
         segmentIdx );
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef __CUDA_ARCH__
@@ -168,6 +163,11 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType
         segmentIdx );
#endif
   }
   else
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect(
         rowPermArray,
         groupPointers,
         segmentIdx );
}

template< typename Device,
@@ -197,12 +197,6 @@ template< typename Device,
__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >::
getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
{
   if( std::is_same< DeviceType, Devices::Host >::value )
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect(
         rowPermArray,
         groupPointers,
         segmentIdx,
         localIdx );
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef __CUDA_ARCH__
@@ -219,6 +213,12 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
         localIdx );
#endif
   }
   else
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect(
         rowPermArray,
         groupPointers,
         segmentIdx,
         localIdx );
}

template< typename Device,
@@ -230,11 +230,6 @@ auto
BiEllpackView< Device, Index, Organization, WarpSize >::
getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
{
   if( std::is_same< DeviceType, Devices::Host >::value )
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect(
         rowPermArray,
         groupPointers,
         segmentIdx );
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef __CUDA_ARCH__
@@ -249,6 +244,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
         segmentIdx );
#endif
   }
   else
      return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect(
         rowPermArray,
         groupPointers,
         segmentIdx );
}

template< typename Device,
Loading