Updated Makefile based on tnl-mhfem (070e2096) · Commits · TNL / tnl-benchmark-mesh

Makefile

+2 −9

Original line number	Diff line number	Diff line
		@@ -19,8 +19,8 @@ host: tnl-benchmark-mesh
		tnl-benchmark-mesh: tnl-benchmark-mesh.o $(MESH_BENCHMARK_TEMPLATES_CPP:%.cpp=%.o)
		$(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS)
		cuda: tnl-benchmark-mesh-cuda
		tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o)
		$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)
		tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cuo $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cuo)
		$(CXX) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS)

		clean: clean_templates
		.PHONY: clean_templates
		@@ -29,11 +29,4 @@ clean_templates:
		$(RM) -r MeshBenchmarks.templates/

		-include $(SOURCES:%.cpp=%.d)

		ifeq ($(CUDA_COMPILER),nvcc)
		# nvcc creates .cu.d with rubbish and .d with the content we need
		-include $(CUDA_SOURCES:%.cu=%.d)
		else
		# clang creates .cu.d
		-include $(CUDA_SOURCES:%.cu=%.cu.d)
		endif

Makefile.base

+134 −105

Original line number	Diff line number	Diff line
		@@ -3,18 +3,6 @@
		#
		# vim: ft=make

		# variables configurable from command line (e.g. make TNL_CXX=clang++ )
		# or from environment (e.g. TNL_CXX=clang++ make )
		# Note that we have to use TNL_CXX instead of CXX to set the default value,
		# because CXX is an implicit variable defined by GNU make itself. (Thank you!)
		TNL_CXX ?= g++
		CXX := $(TNL_CXX)
		CUDA_COMPILER ?= nvcc
		CUDA_HOST_COMPILER ?= $(CXX)
		# BUILD can be 'Release' or 'Debug'
		BUILD ?= Release


		# relative paths used in the messages
		mkfile_path := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
		current_dir := ./$(subst $(mkfile_path),,$(CURDIR)/)
		@@ -23,6 +11,14 @@ current_dir := $(current_dir:./%/=%)
		# include colors
		include $(mkfile_path)/Makefile.colors

		# include global and local configs
		include $(mkfile_path)/config.mk
		ifneq ($(current_dir),./) # to skip double inclusion of the main config.mk
		ifneq ("$(wildcard $(CURDIR)/config.mk)","") # to check if the file exists
		include $(CURDIR)/config.mk
		endif
		endif

		# make does not have an OR statement...
		ifneq ($(filter $(BUILD),Release Debug),)
		$(info $(shell printf "$(COLOR_BOLD)### Building directory $(current_dir) in $(BUILD) mode$(COLOR_ALL_OFF)" ))
		@@ -31,14 +27,17 @@ $(error Wrong BUILD: $(BUILD))
		endif


		## essential host options
		CPPFLAGS = -MD -MP
		CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-deprecated -Wno-deprecated-declarations
		LDFLAGS = -pthread
		LDLIBS = -lstdc++fs
		ifneq ($(CXX),clang++)
		## essential host compiler flags
		CPPFLAGS := -MD -MP
		CXXFLAGS := -std=c++14 -Wall -Wno-unused-local-typedefs
		ifeq ($(CXX),g++)
		CXXFLAGS += -Wno-maybe-uninitialized
		endif
		## essential linker flags
		LDFLAGS := -pthread
		CUDA_LDFLAGS := $(LDFLAGS)
		LDLIBS := -lstdc++fs
		CUDA_LDLIBS := $(LDLIBS)

		ifeq ($(BUILD),Release)
		CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
		@@ -47,6 +46,9 @@ CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG
		ifeq ($(CXX),clang++)
		CXXFLAGS += -flto=thin
		LDFLAGS += -O3 -march=native -mtune=native -flto=thin
		else ifeq ($(CXX),g++)
		CXXFLAGS += -flto
		LDFLAGS += -O3 -march=native -mtune=native -flto
		endif
		else ifeq ($(BUILD),Debug)
		CXXFLAGS += -Og -g
		@@ -60,21 +62,26 @@ LDFLAGS += -fuse-ld=lld
		endif


		## add flags for linking CUDA with the host compiler
		CUDA_LDFLAGS += -L $(CUDA_PATH)/lib64/
		ifeq ($(STATIC_LINKING),True)
		CUDA_LDLIBS += -lcudart_static
		else
		CUDA_LDLIBS += -lcudart
		endif
		CUDA_LDLIBS += -ldl -lrt

		## options for nvcc
		# automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag)
		# note that $@ is expanded only when $(CUDA_CPPFLAGS) is used
		CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@
		CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
		CUDA_LDLIBS = -lstdc++fs
		CUDA_CPPFLAGS := -MD -MP
		CUDA_CXXFLAGS := -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER)
		# enable nvcc features
		CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda --default-stream per-thread
		# disable false compiler warnings
		# reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910
		# list of possible tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg
		CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla
		# This diagnostic is just plain wrong in CUDA 9
		# See https://github.com/kokkos/kokkos/issues/1470
		CUDA_CXXFLAGS += -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla
		# This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470
		CUDA_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
		# disable deprecation warnings
		CUDA_CXXFLAGS += -Xcudafe --diag_suppress=1444

		ifeq ($(BUILD),Release)
		CUDA_CXXFLAGS += -O3 -DNDEBUG -Xcompiler -march=native,-mtune=native
		@@ -84,20 +91,9 @@ endif

		## options for clang as CUDA compiler
		ifeq ($(CUDA_COMPILER),clang++)
		CUDA_CPPFLAGS = $(CPPFLAGS)
		# immediate expansion of the following two vars
		CUDA_CPPFLAGS := $(CPPFLAGS)
		CUDA_CXXFLAGS := $(CXXFLAGS)
		CUDA_LDFLAGS := $(LDFLAGS)
		# FIXME: better detection
		_CUDA_PATH = $(dir $(subst -I,,$(shell pkg-config --cflags-only-I cuda)))
		CUDA_CXXFLAGS += --cuda-path=$(_CUDA_PATH)
		CUDA_LDFLAGS += -L $(_CUDA_PATH)/lib64/
		ifeq ($(STATIC_LINKING),True)
		CUDA_LDLIBS += -lcudart_static
		else
		CUDA_LDLIBS += -lcudart
		endif
		CUDA_LDLIBS += -ldl -lrt
		CUDA_CXXFLAGS += --cuda-path=$(CUDA_PATH)
		# include info for profiling
		#CUDA_CXXFLAGS += -Xcuda-fatbinary --cmdline -Xcuda-fatbinary " --generate-line-info " -Xcuda-ptxas --generate-line-info -g
		# debugging register spills
		@@ -108,32 +104,36 @@ CUDA_LDLIBS += -ldl -lrt
		#CUDA_CXXFLAGS += -Xcuda-ptxas -dlcm=ca
		endif

		# select CUDA GPU architecture
		# set the CUDA GPU architecture
		ifeq ($(CUDA_COMPILER),clang++)
		ifeq ($(CUDA_GPU_ARCH),auto)
		CUDA_CXXFLAGS += $(shell tnl-cuda-arch --clang)
		else
		CUDA_CXXFLAGS += --cuda-gpu-arch=$(CUDA_GPU_ARCH)
		endif
		else
		ifeq ($(CUDA_GPU_ARCH),auto)
		CUDA_CXXFLAGS += $(shell tnl-cuda-arch)
		else
		CUDA_CXXFLAGS += --gpu-architecture=$(CUDA_GPU_ARCH)
		endif
		endif


		# append flags for TNL
		PKGS = tnl
		PKGS_CUDA = tnl-cuda
		CXXFLAGS += $(shell pkg-config --cflags $(PKGS))
		CUDA_CXXFLAGS += $(shell pkg-config --cflags $(PKGS_CUDA))
		#ifeq ($(STATIC_LINKING),True)
		## hack because cmake does not support shared and static libraries with the same name
		#LDLIBS += $(subst tnl,tnl_static,$(shell pkg-config --libs $(PKGS)))
		#CUDA_LDLIBS += $(subst tnl,tnl-cuda_static,$(shell pkg-config --libs $(PKGS_CUDA)))
		#else
		#LDLIBS += $(shell pkg-config --libs $(PKGS))
		#CUDA_LDLIBS += $(shell pkg-config --libs $(PKGS_CUDA))
		#endif
		# TODO: add TNL as a git submodule
		#TNL_INCLUDE_DIRS := -I $(mkfile_path)/libs/tnl/src/ -I $(mkfile_path)/libs/tnl/src/3rdparty/
		TNL_INCLUDE_DIRS := -I ~/.local/include
		CPPFLAGS += $(TNL_INCLUDE_DIRS)
		CUDA_CPPFLAGS += $(TNL_INCLUDE_DIRS) -DHAVE_CUDA

		# OpenMP
		ifeq ($(CXX),clang++)
		OPENMP_CXXFLAGS := -fopenmp=libomp -DHAVE_OPENMP
		OPENMP_LDLIBS := -lomp
		else ifeq ($(CXX),icpc)
		OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP
		OPENMP_LDLIBS := -liomp5
		else
		OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP
		OPENMP_LDLIBS := -lgomp
		@@ -158,10 +158,13 @@ endif


		# base targets
		.PHONY: all cuda host
		all: cuda host
		.PHONY: all cuda host mpi_host mpi_cuda mpi
		all: cuda host mpi
		host: $(TARGETS) subdirs
		cuda: $(CUDA_TARGETS)
		mpi_host: $(MPI_TARGETS)
		mpi_cuda: $(MPI_CUDA_TARGETS)
		mpi: mpi_host mpi_cuda

		# descend into $(SUBDIRS) (when defined)
		.PHONY: subdirs $(SUBDIRS)
		@@ -177,7 +180,7 @@ ifdef SOURCES
		$(RM) $(SOURCES:%.cpp=%.o) $(SOURCES:%.cpp=%.d)
		endif
		ifdef CUDA_SOURCES
		$(RM) $(CUDA_SOURCES:%.cu=%.cu.o) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d)
		$(RM) $(CUDA_SOURCES:%.cu=%.cuo) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d)
		endif
		ifdef TARGETS
		$(RM) $(TARGETS)
		@@ -185,7 +188,14 @@ endif
		ifdef CUDA_TARGETS
		$(RM) $(CUDA_TARGETS)
		endif
		$(RM) tmpxft_*.d
		ifdef MPI_TARGETS
		$(RM) $(MPI_SOURCES:%.cpp=%_mpi.o) $(MPI_SOURCES:%.cpp=%_mpi.d)
		$(RM) $(MPI_TARGETS)
		endif
		ifdef MPI_CUDA_TARGETS
		$(RM) $(MPI_CUDA_TARGETS)
		$(RM) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo) $(MPI_CUDA_SOURCES:%.cu=%_mpi.d) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cu.d)
		endif

		# recursive cleanup
		.PHONY: distclean
		@@ -197,11 +207,30 @@ distclean: clean
		$(TARGETS): % : %.o
		$(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS)

		$(CUDA_TARGETS): % : %.cu.o
		$(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS)
		# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939
		# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file)
		$(CUDA_TARGETS): % : %.cuo
		$(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS)

		$(SOURCES:%.cpp=%.o): %.o: %.cpp
		$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<

		$(CUDA_SOURCES:%.cu=%.cu.o): %.cu.o : %.cu
		$(CUDA_SOURCES:%.cu=%.cuo): %.cuo : %.cu
		$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -c -o $@ $<

		# rules for MPI targets
		$(MPI_TARGETS): % : %.o
		$(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(MPI_LDLIBS)

		$(MPI_CUDA_TARGETS): % : %.cuo
		$(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(MPI_LDLIBS)

		$(MPI_SOURCES:%.cpp=%_mpi.o): %_mpi.o: %.cpp
		$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $<

		$(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo): %_mpi.cuo : %.cu
		ifeq ($(CUDA_COMPILER),nvcc)
		$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_NVCC_CXXFLAGS) -c -o $@ $<
		else
		$(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $<
		endif

config.mk

0 → 100644

+30 −0

Original line number	Diff line number	Diff line
		# All variables are also configurable from the command line (e.g. make TNL_CXX=clang++ )
		# or from the environment (e.g. TNL_CXX=clang++ make ).
		#
		# If you are building targets in a subdirectory, you can create a local config.mk
		# file in that subdirectory to override options from the global config.mk file.

		# build type (can be 'Release' or 'Debug')
		BUILD ?= Release

		# compiler for the .cpp files
		# Note that we have to use TNL_CXX instead of CXX to set the default value,
		# because CXX is an implicit variable defined by GNU make itself. (Thank you!)
		TNL_CXX ?= g++
		CXX := $(TNL_CXX)

		# compiler for the .cu files
		CUDA_COMPILER ?= nvcc

		# compiler for the host/CPU code used by nvcc
		CUDA_HOST_COMPILER ?= $(CXX)

		# path to the CUDA toolkit installation
		# (autodetection is attempted, set it manually if it fails)
		CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..)
		#$(info Detected CUDA_PATH: $(CUDA_PATH))

		# CUDA GPU architecture (e.g. "sm_61" or "auto")
		# (if you use "auto", tnl-cuda-arch must be installed in your $PATH)
		CUDA_GPU_ARCH ?= auto
		#CUDA_GPU_ARCH ?= sm_70