Commit 070e2096 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Updated Makefile based on tnl-mhfem

parent 60ce7e64
Loading
Loading
Loading
Loading
+2 −9
Original line number Diff line number Diff line
@@ -19,8 +19,8 @@ host: tnl-benchmark-mesh
tnl-benchmark-mesh: tnl-benchmark-mesh.o $(MESH_BENCHMARK_TEMPLATES_CPP:%.cpp=%.o)
	$(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS)
cuda: tnl-benchmark-mesh-cuda
tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o)
	$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)
tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cuo $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cuo)
	$(CXX) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)

clean: clean_templates
.PHONY: clean_templates
@@ -29,11 +29,4 @@ clean_templates:
	$(RM) -r MeshBenchmarks.templates/

-include $(SOURCES:%.cpp=%.d)

ifeq ($(CUDA_COMPILER),nvcc)
# nvcc creates .cu.d with rubbish and .d with the content we need
-include $(CUDA_SOURCES:%.cu=%.d)
else
# clang creates .cu.d
-include $(CUDA_SOURCES:%.cu=%.cu.d)
endif
+134 −105
Original line number Diff line number Diff line
@@ -3,18 +3,6 @@
#
# vim: ft=make

# variables configurable from command line (e.g. make TNL_CXX=clang++ )
# or from environment (e.g. TNL_CXX=clang++ make )
# Note that we have to use TNL_CXX instead of CXX to set the default value,
# because CXX is an implicit variable defined by GNU make itself. (Thank you!)
TNL_CXX ?= g++
CXX := $(TNL_CXX)
CUDA_COMPILER ?= nvcc
CUDA_HOST_COMPILER ?= $(CXX)
# BUILD can be 'Release' or 'Debug'
BUILD ?= Release


# relative paths used in the messages
mkfile_path := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
current_dir := ./$(subst $(mkfile_path),,$(CURDIR)/)
@@ -23,6 +11,14 @@ current_dir := $(current_dir:./%/=%)
# include colors
include $(mkfile_path)/Makefile.colors

# include global and local configs
include $(mkfile_path)/config.mk
ifneq ($(current_dir),./)  # to skip double inclusion of the main config.mk
ifneq ("$(wildcard $(CURDIR)/config.mk)","")  # to check if the file exists
include $(CURDIR)/config.mk
endif
endif

# make does not have an OR statement...
ifneq ($(filter $(BUILD),Release Debug),)
    $(info $(shell printf "$(COLOR_BOLD)### Building directory $(current_dir) in $(BUILD) mode$(COLOR_ALL_OFF)" ))
@@ -31,14 +27,17 @@ $(error Wrong BUILD: $(BUILD))
endif


## essential host options
CPPFLAGS = -MD -MP
CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable  -Wno-deprecated -Wno-deprecated-declarations
LDFLAGS = -pthread
LDLIBS = -lstdc++fs
ifneq ($(CXX),clang++)
## essential host compiler flags
CPPFLAGS := -MD -MP
CXXFLAGS := -std=c++14 -Wall -Wno-unused-local-typedefs
ifeq ($(CXX),g++)
    CXXFLAGS += -Wno-maybe-uninitialized
endif
## essential linker flags
LDFLAGS := -pthread
CUDA_LDFLAGS := $(LDFLAGS)
LDLIBS := -lstdc++fs
CUDA_LDLIBS := $(LDLIBS)

ifeq ($(BUILD),Release)
    CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
@@ -47,6 +46,9 @@ CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
    ifeq ($(CXX),clang++)
        CXXFLAGS += -flto=thin
        LDFLAGS += -O3 -march=native -mtune=native -flto=thin
    else ifeq ($(CXX),g++)
        CXXFLAGS += -flto
        LDFLAGS += -O3 -march=native -mtune=native -flto
    endif
else ifeq ($(BUILD),Debug)
    CXXFLAGS += -Og -g
@@ -60,21 +62,26 @@ LDFLAGS += -fuse-ld=lld
endif


## add flags for linking CUDA with the host compiler
CUDA_LDFLAGS += -L $(CUDA_PATH)/lib64/
ifeq ($(STATIC_LINKING),True)
    CUDA_LDLIBS += -lcudart_static
else
    CUDA_LDLIBS += -lcudart
endif
CUDA_LDLIBS += -ldl -lrt

## options for nvcc
# automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag)
# note that $@ is expanded only when $(CUDA_CPPFLAGS) is used
CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@
CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
CUDA_LDLIBS = -lstdc++fs
CUDA_CPPFLAGS := -MD -MP
CUDA_CXXFLAGS := -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
# enable nvcc features
CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda --default-stream per-thread
# disable false compiler warnings
#   reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910
#   list of possible tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg
CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla
# This diagnostic is just plain wrong in CUDA 9
# See https://github.com/kokkos/kokkos/issues/1470
CUDA_CXXFLAGS += -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla
# This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
CUDA_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
# disable deprecation warnings
CUDA_CXXFLAGS += -Xcudafe --diag_suppress=1444

ifeq ($(BUILD),Release)
    CUDA_CXXFLAGS += -O3 -DNDEBUG -Xcompiler -march=native,-mtune=native
@@ -84,20 +91,9 @@ endif

## options for clang as CUDA compiler
ifeq ($(CUDA_COMPILER),clang++)
CUDA_CPPFLAGS = $(CPPFLAGS)
# immediate expansion of the following two vars
    CUDA_CPPFLAGS := $(CPPFLAGS)
    CUDA_CXXFLAGS := $(CXXFLAGS)
CUDA_LDFLAGS := $(LDFLAGS)
# FIXME: better detection
_CUDA_PATH = $(dir $(subst -I,,$(shell pkg-config --cflags-only-I cuda)))
CUDA_CXXFLAGS += --cuda-path=$(_CUDA_PATH)
CUDA_LDFLAGS += -L $(_CUDA_PATH)/lib64/
ifeq ($(STATIC_LINKING),True)
CUDA_LDLIBS += -lcudart_static
else
CUDA_LDLIBS += -lcudart
endif
CUDA_LDLIBS += -ldl -lrt
    CUDA_CXXFLAGS += --cuda-path=$(CUDA_PATH)
    # include info for profiling
    #CUDA_CXXFLAGS += -Xcuda-fatbinary --cmdline -Xcuda-fatbinary " --generate-line-info " -Xcuda-ptxas --generate-line-info -g
    # debugging register spills
@@ -108,32 +104,36 @@ CUDA_LDLIBS += -ldl -lrt
    #CUDA_CXXFLAGS += -Xcuda-ptxas -dlcm=ca
endif

# select CUDA GPU architecture
# set the CUDA GPU architecture
ifeq ($(CUDA_COMPILER),clang++)
    ifeq ($(CUDA_GPU_ARCH),auto)
        CUDA_CXXFLAGS += $(shell tnl-cuda-arch --clang)
    else
        CUDA_CXXFLAGS += --cuda-gpu-arch=$(CUDA_GPU_ARCH)
    endif
else
    ifeq ($(CUDA_GPU_ARCH),auto)
        CUDA_CXXFLAGS += $(shell tnl-cuda-arch)
    else
        CUDA_CXXFLAGS += --gpu-architecture=$(CUDA_GPU_ARCH)
    endif
endif


# append flags for TNL
PKGS = tnl
PKGS_CUDA = tnl-cuda
CXXFLAGS += $(shell pkg-config --cflags $(PKGS))
CUDA_CXXFLAGS += $(shell pkg-config --cflags $(PKGS_CUDA))
#ifeq ($(STATIC_LINKING),True)
## hack because cmake does not support shared and static libraries with the same name
#LDLIBS += $(subst tnl,tnl_static,$(shell pkg-config --libs $(PKGS)))
#CUDA_LDLIBS += $(subst tnl,tnl-cuda_static,$(shell pkg-config --libs $(PKGS_CUDA)))
#else
#LDLIBS += $(shell pkg-config --libs $(PKGS))
#CUDA_LDLIBS += $(shell pkg-config --libs $(PKGS_CUDA))
#endif
# TODO: add TNL as a git submodule
#TNL_INCLUDE_DIRS := -I $(mkfile_path)/libs/tnl/src/ -I $(mkfile_path)/libs/tnl/src/3rdparty/
TNL_INCLUDE_DIRS := -I ~/.local/include
CPPFLAGS += $(TNL_INCLUDE_DIRS)
CUDA_CPPFLAGS += $(TNL_INCLUDE_DIRS) -DHAVE_CUDA

# OpenMP
ifeq ($(CXX),clang++)
    OPENMP_CXXFLAGS := -fopenmp=libomp -DHAVE_OPENMP
    OPENMP_LDLIBS := -lomp
else ifeq ($(CXX),icpc)
    OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP
    OPENMP_LDLIBS := -liomp5
else
    OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP
    OPENMP_LDLIBS := -lgomp
@@ -158,10 +158,13 @@ endif


# base targets
.PHONY: all cuda host
all: cuda host
.PHONY: all cuda host mpi_host mpi_cuda mpi
all: cuda host mpi
host: $(TARGETS) subdirs
cuda: $(CUDA_TARGETS)
mpi_host: $(MPI_TARGETS)
mpi_cuda: $(MPI_CUDA_TARGETS)
mpi: mpi_host mpi_cuda

# descend into $(SUBDIRS) (when defined)
.PHONY: subdirs $(SUBDIRS)
@@ -177,7 +180,7 @@ ifdef SOURCES
	$(RM) $(SOURCES:%.cpp=%.o) $(SOURCES:%.cpp=%.d)
endif
ifdef CUDA_SOURCES
	$(RM) $(CUDA_SOURCES:%.cu=%.cu.o) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d)
	$(RM) $(CUDA_SOURCES:%.cu=%.cuo) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d)
endif
ifdef TARGETS
	$(RM) $(TARGETS)
@@ -185,7 +188,14 @@ endif
ifdef CUDA_TARGETS
	$(RM) $(CUDA_TARGETS)
endif
	$(RM) tmpxft_*.d
ifdef MPI_TARGETS
	$(RM) $(MPI_SOURCES:%.cpp=%_mpi.o) $(MPI_SOURCES:%.cpp=%_mpi.d)
	$(RM) $(MPI_TARGETS)
endif
ifdef MPI_CUDA_TARGETS
	$(RM) $(MPI_CUDA_TARGETS)
	$(RM) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo) $(MPI_CUDA_SOURCES:%.cu=%_mpi.d) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cu.d)
endif

# recursive cleanup
.PHONY: distclean
@@ -197,11 +207,30 @@ distclean: clean
$(TARGETS): % : %.o
	$(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS)

$(CUDA_TARGETS): % : %.cu.o
	$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS)
# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939
# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file)
$(CUDA_TARGETS): % : %.cuo
	$(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS)

$(SOURCES:%.cpp=%.o): %.o: %.cpp
	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<

$(CUDA_SOURCES:%.cu=%.cu.o): %.cu.o : %.cu
$(CUDA_SOURCES:%.cu=%.cuo): %.cuo : %.cu
	$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -c -o $@ $<

# rules for MPI targets
$(MPI_TARGETS): % : %.o
	$(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(MPI_LDLIBS)

$(MPI_CUDA_TARGETS): % : %.cuo
	$(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(MPI_LDLIBS)

$(MPI_SOURCES:%.cpp=%_mpi.o): %_mpi.o: %.cpp
	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $<

$(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo): %_mpi.cuo : %.cu
ifeq ($(CUDA_COMPILER),nvcc)
	$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_NVCC_CXXFLAGS) -c -o $@ $<
else
	$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $<
endif

config.mk

0 → 100644
+30 −0
Original line number Diff line number Diff line
# All variables are also configurable from the command line (e.g. make TNL_CXX=clang++ )
# or from the environment (e.g. TNL_CXX=clang++ make ).
#
# If you are building targets in a subdirectory, you can create a local config.mk
# file in that subdirectory to override options from the global config.mk file.

# build type (can be 'Release' or 'Debug')
BUILD ?= Release

# compiler for the .cpp files
# Note that we have to use TNL_CXX instead of CXX to set the default value,
# because CXX is an implicit variable defined by GNU make itself. (Thank you!)
TNL_CXX ?= g++
CXX := $(TNL_CXX)

# compiler for the .cu files
CUDA_COMPILER ?= nvcc

# compiler for the host/CPU code used by nvcc
CUDA_HOST_COMPILER ?= $(CXX)

# path to the CUDA toolkit installation
# (autodetection is attempted, set it manually if it fails)
CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..)
#$(info Detected CUDA_PATH: $(CUDA_PATH))

# CUDA GPU architecture (e.g. "sm_61" or "auto")
# (if you use "auto", tnl-cuda-arch must be installed in your $PATH)
CUDA_GPU_ARCH ?= auto
#CUDA_GPU_ARCH ?= sm_70