Loading Makefile +2 −9 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ host: tnl-benchmark-mesh tnl-benchmark-mesh: tnl-benchmark-mesh.o $(MESH_BENCHMARK_TEMPLATES_CPP:%.cpp=%.o) $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) cuda: tnl-benchmark-mesh-cuda tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o) $(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS) tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cuo $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cuo) $(CXX) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS) clean: clean_templates .PHONY: clean_templates Loading @@ -29,11 +29,4 @@ clean_templates: $(RM) -r MeshBenchmarks.templates/ -include $(SOURCES:%.cpp=%.d) ifeq ($(CUDA_COMPILER),nvcc) # nvcc creates .cu.d with rubbish and .d with the content we need -include $(CUDA_SOURCES:%.cu=%.d) else # clang creates .cu.d -include $(CUDA_SOURCES:%.cu=%.cu.d) endif Makefile.base +134 −105 Original line number Diff line number Diff line Loading @@ -3,18 +3,6 @@ # # vim: ft=make # variables configurable from command line (e.g. make TNL_CXX=clang++ ) # or from environment (e.g. TNL_CXX=clang++ make ) # Note that we have to use TNL_CXX instead of CXX to set the default value, # because CXX is an implicit variable defined by GNU make itself. (Thank you!) TNL_CXX ?= g++ CXX := $(TNL_CXX) CUDA_COMPILER ?= nvcc CUDA_HOST_COMPILER ?= $(CXX) # BUILD can be 'Release' or 'Debug' BUILD ?= Release # relative paths used in the messages mkfile_path := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) current_dir := ./$(subst $(mkfile_path),,$(CURDIR)/) Loading @@ -23,6 +11,14 @@ current_dir := $(current_dir:./%/=%) # include colors include $(mkfile_path)/Makefile.colors # include global and local configs include $(mkfile_path)/config.mk ifneq ($(current_dir),./) # to skip double inclusion of the main config.mk ifneq ("$(wildcard $(CURDIR)/config.mk)","") # to check if the file exists include $(CURDIR)/config.mk endif endif # make does not have an OR statement... ifneq ($(filter $(BUILD),Release Debug),) $(info $(shell printf "$(COLOR_BOLD)### Building directory $(current_dir) in $(BUILD) mode$(COLOR_ALL_OFF)" )) Loading @@ -31,14 +27,17 @@ $(error Wrong BUILD: $(BUILD)) endif ## essential host options CPPFLAGS = -MD -MP CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-deprecated -Wno-deprecated-declarations LDFLAGS = -pthread LDLIBS = -lstdc++fs ifneq ($(CXX),clang++) ## essential host compiler flags CPPFLAGS := -MD -MP CXXFLAGS := -std=c++14 -Wall -Wno-unused-local-typedefs ifeq ($(CXX),g++) CXXFLAGS += -Wno-maybe-uninitialized endif ## essential linker flags LDFLAGS := -pthread CUDA_LDFLAGS := $(LDFLAGS) LDLIBS := -lstdc++fs CUDA_LDLIBS := $(LDLIBS) ifeq ($(BUILD),Release) CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG Loading @@ -47,6 +46,9 @@ CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG ifeq ($(CXX),clang++) CXXFLAGS += -flto=thin LDFLAGS += -O3 -march=native -mtune=native -flto=thin else ifeq ($(CXX),g++) CXXFLAGS += -flto LDFLAGS += -O3 -march=native -mtune=native -flto endif else ifeq ($(BUILD),Debug) CXXFLAGS += -Og -g Loading @@ -60,21 +62,26 @@ LDFLAGS += -fuse-ld=lld endif ## add flags for linking CUDA with the host compiler CUDA_LDFLAGS += -L $(CUDA_PATH)/lib64/ ifeq ($(STATIC_LINKING),True) CUDA_LDLIBS += -lcudart_static else CUDA_LDLIBS += -lcudart endif CUDA_LDLIBS += -ldl -lrt ## options for nvcc # automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag) # note that $@ is expanded only when $(CUDA_CPPFLAGS) is used CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@ CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER) CUDA_LDLIBS = -lstdc++fs CUDA_CPPFLAGS := -MD -MP CUDA_CXXFLAGS := -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER) # enable nvcc features CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda --default-stream per-thread # disable false compiler warnings # reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910 # list of possible tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla # This diagnostic is just plain wrong in CUDA 9 # See https://github.com/kokkos/kokkos/issues/1470 CUDA_CXXFLAGS += -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470 CUDA_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored # disable deprecation warnings CUDA_CXXFLAGS += -Xcudafe --diag_suppress=1444 ifeq ($(BUILD),Release) CUDA_CXXFLAGS += -O3 -DNDEBUG -Xcompiler -march=native,-mtune=native Loading @@ -84,20 +91,9 @@ endif ## options for clang as CUDA compiler ifeq ($(CUDA_COMPILER),clang++) CUDA_CPPFLAGS = $(CPPFLAGS) # immediate expansion of the following two vars CUDA_CPPFLAGS := $(CPPFLAGS) CUDA_CXXFLAGS := $(CXXFLAGS) CUDA_LDFLAGS := $(LDFLAGS) # FIXME: better detection _CUDA_PATH = $(dir $(subst -I,,$(shell pkg-config --cflags-only-I cuda))) CUDA_CXXFLAGS += --cuda-path=$(_CUDA_PATH) CUDA_LDFLAGS += -L $(_CUDA_PATH)/lib64/ ifeq ($(STATIC_LINKING),True) CUDA_LDLIBS += -lcudart_static else CUDA_LDLIBS += -lcudart endif CUDA_LDLIBS += -ldl -lrt CUDA_CXXFLAGS += --cuda-path=$(CUDA_PATH) # include info for profiling #CUDA_CXXFLAGS += -Xcuda-fatbinary --cmdline -Xcuda-fatbinary " --generate-line-info " -Xcuda-ptxas --generate-line-info -g # debugging register spills Loading @@ -108,32 +104,36 @@ CUDA_LDLIBS += -ldl -lrt #CUDA_CXXFLAGS += -Xcuda-ptxas -dlcm=ca endif # select CUDA GPU architecture # set the CUDA GPU architecture ifeq ($(CUDA_COMPILER),clang++) ifeq ($(CUDA_GPU_ARCH),auto) CUDA_CXXFLAGS += $(shell tnl-cuda-arch --clang) else CUDA_CXXFLAGS += --cuda-gpu-arch=$(CUDA_GPU_ARCH) endif else ifeq ($(CUDA_GPU_ARCH),auto) CUDA_CXXFLAGS += $(shell tnl-cuda-arch) else CUDA_CXXFLAGS += --gpu-architecture=$(CUDA_GPU_ARCH) endif endif # append flags for TNL PKGS = tnl PKGS_CUDA = tnl-cuda CXXFLAGS += $(shell pkg-config --cflags $(PKGS)) CUDA_CXXFLAGS += $(shell pkg-config --cflags $(PKGS_CUDA)) #ifeq ($(STATIC_LINKING),True) ## hack because cmake does not support shared and static libraries with the same name #LDLIBS += $(subst tnl,tnl_static,$(shell pkg-config --libs $(PKGS))) #CUDA_LDLIBS += $(subst tnl,tnl-cuda_static,$(shell pkg-config --libs $(PKGS_CUDA))) #else #LDLIBS += $(shell pkg-config --libs $(PKGS)) #CUDA_LDLIBS += $(shell pkg-config --libs $(PKGS_CUDA)) #endif # TODO: add TNL as a git submodule #TNL_INCLUDE_DIRS := -I $(mkfile_path)/libs/tnl/src/ -I $(mkfile_path)/libs/tnl/src/3rdparty/ TNL_INCLUDE_DIRS := -I ~/.local/include CPPFLAGS += $(TNL_INCLUDE_DIRS) CUDA_CPPFLAGS += $(TNL_INCLUDE_DIRS) -DHAVE_CUDA # OpenMP ifeq ($(CXX),clang++) OPENMP_CXXFLAGS := -fopenmp=libomp -DHAVE_OPENMP OPENMP_LDLIBS := -lomp else ifeq ($(CXX),icpc) OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP OPENMP_LDLIBS := -liomp5 else OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP OPENMP_LDLIBS := -lgomp Loading @@ -158,10 +158,13 @@ endif # base targets .PHONY: all cuda host all: cuda host .PHONY: all cuda host mpi_host mpi_cuda mpi all: cuda host mpi host: $(TARGETS) subdirs cuda: $(CUDA_TARGETS) mpi_host: $(MPI_TARGETS) mpi_cuda: $(MPI_CUDA_TARGETS) mpi: mpi_host mpi_cuda # descend into $(SUBDIRS) (when defined) .PHONY: subdirs $(SUBDIRS) Loading @@ -177,7 +180,7 @@ ifdef SOURCES $(RM) $(SOURCES:%.cpp=%.o) $(SOURCES:%.cpp=%.d) endif ifdef CUDA_SOURCES $(RM) $(CUDA_SOURCES:%.cu=%.cu.o) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d) $(RM) $(CUDA_SOURCES:%.cu=%.cuo) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d) endif ifdef TARGETS $(RM) $(TARGETS) Loading @@ -185,7 +188,14 @@ endif ifdef CUDA_TARGETS $(RM) $(CUDA_TARGETS) endif $(RM) tmpxft_*.d ifdef MPI_TARGETS $(RM) $(MPI_SOURCES:%.cpp=%_mpi.o) $(MPI_SOURCES:%.cpp=%_mpi.d) $(RM) $(MPI_TARGETS) endif ifdef MPI_CUDA_TARGETS $(RM) $(MPI_CUDA_TARGETS) $(RM) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo) $(MPI_CUDA_SOURCES:%.cu=%_mpi.d) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cu.d) endif # recursive cleanup .PHONY: distclean Loading @@ -197,11 +207,30 @@ distclean: clean $(TARGETS): % : %.o $(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(CUDA_TARGETS): % : %.cu.o $(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) # use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939 # (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file) $(CUDA_TARGETS): % : %.cuo $(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(SOURCES:%.cpp=%.o): %.o: %.cpp $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(CUDA_SOURCES:%.cu=%.cu.o): %.cu.o : %.cu $(CUDA_SOURCES:%.cu=%.cuo): %.cuo : %.cu $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -c -o $@ $< # rules for MPI targets $(MPI_TARGETS): % : %.o $(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(MPI_LDLIBS) $(MPI_CUDA_TARGETS): % : %.cuo $(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(MPI_LDLIBS) $(MPI_SOURCES:%.cpp=%_mpi.o): %_mpi.o: %.cpp $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $< $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo): %_mpi.cuo : %.cu ifeq ($(CUDA_COMPILER),nvcc) $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_NVCC_CXXFLAGS) -c -o $@ $< else $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $< endif config.mk 0 → 100644 +30 −0 Original line number Diff line number Diff line # All variables are also configurable from the command line (e.g. make TNL_CXX=clang++ ) # or from the environment (e.g. TNL_CXX=clang++ make ). # # If you are building targets in a subdirectory, you can create a local config.mk # file in that subdirectory to override options from the global config.mk file. # build type (can be 'Release' or 'Debug') BUILD ?= Release # compiler for the .cpp files # Note that we have to use TNL_CXX instead of CXX to set the default value, # because CXX is an implicit variable defined by GNU make itself. (Thank you!) TNL_CXX ?= g++ CXX := $(TNL_CXX) # compiler for the .cu files CUDA_COMPILER ?= nvcc # compiler for the host/CPU code used by nvcc CUDA_HOST_COMPILER ?= $(CXX) # path to the CUDA toolkit installation # (autodetection is attempted, set it manually if it fails) CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..) #$(info Detected CUDA_PATH: $(CUDA_PATH)) # CUDA GPU architecture (e.g. "sm_61" or "auto") # (if you use "auto", tnl-cuda-arch must be installed in your $PATH) CUDA_GPU_ARCH ?= auto #CUDA_GPU_ARCH ?= sm_70 Loading
Makefile +2 −9 Original line number Diff line number Diff line Loading @@ -19,8 +19,8 @@ host: tnl-benchmark-mesh tnl-benchmark-mesh: tnl-benchmark-mesh.o $(MESH_BENCHMARK_TEMPLATES_CPP:%.cpp=%.o) $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) cuda: tnl-benchmark-mesh-cuda tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cu.o $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cu.o) $(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS) tnl-benchmark-mesh-cuda: tnl-benchmark-mesh-cuda.cuo $(MESH_BENCHMARK_TEMPLATES_CU:%.cu=%.cuo) $(CXX) $(CUDA_LDFLAGS) -o $@ $^ $(CUDA_LDLIBS) clean: clean_templates .PHONY: clean_templates Loading @@ -29,11 +29,4 @@ clean_templates: $(RM) -r MeshBenchmarks.templates/ -include $(SOURCES:%.cpp=%.d) ifeq ($(CUDA_COMPILER),nvcc) # nvcc creates .cu.d with rubbish and .d with the content we need -include $(CUDA_SOURCES:%.cu=%.d) else # clang creates .cu.d -include $(CUDA_SOURCES:%.cu=%.cu.d) endif
Makefile.base +134 −105 Original line number Diff line number Diff line Loading @@ -3,18 +3,6 @@ # # vim: ft=make # variables configurable from command line (e.g. make TNL_CXX=clang++ ) # or from environment (e.g. TNL_CXX=clang++ make ) # Note that we have to use TNL_CXX instead of CXX to set the default value, # because CXX is an implicit variable defined by GNU make itself. (Thank you!) TNL_CXX ?= g++ CXX := $(TNL_CXX) CUDA_COMPILER ?= nvcc CUDA_HOST_COMPILER ?= $(CXX) # BUILD can be 'Release' or 'Debug' BUILD ?= Release # relative paths used in the messages mkfile_path := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) current_dir := ./$(subst $(mkfile_path),,$(CURDIR)/) Loading @@ -23,6 +11,14 @@ current_dir := $(current_dir:./%/=%) # include colors include $(mkfile_path)/Makefile.colors # include global and local configs include $(mkfile_path)/config.mk ifneq ($(current_dir),./) # to skip double inclusion of the main config.mk ifneq ("$(wildcard $(CURDIR)/config.mk)","") # to check if the file exists include $(CURDIR)/config.mk endif endif # make does not have an OR statement... ifneq ($(filter $(BUILD),Release Debug),) $(info $(shell printf "$(COLOR_BOLD)### Building directory $(current_dir) in $(BUILD) mode$(COLOR_ALL_OFF)" )) Loading @@ -31,14 +27,17 @@ $(error Wrong BUILD: $(BUILD)) endif ## essential host options CPPFLAGS = -MD -MP CXXFLAGS = -std=c++14 -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-deprecated -Wno-deprecated-declarations LDFLAGS = -pthread LDLIBS = -lstdc++fs ifneq ($(CXX),clang++) ## essential host compiler flags CPPFLAGS := -MD -MP CXXFLAGS := -std=c++14 -Wall -Wno-unused-local-typedefs ifeq ($(CXX),g++) CXXFLAGS += -Wno-maybe-uninitialized endif ## essential linker flags LDFLAGS := -pthread CUDA_LDFLAGS := $(LDFLAGS) LDLIBS := -lstdc++fs CUDA_LDLIBS := $(LDLIBS) ifeq ($(BUILD),Release) CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG Loading @@ -47,6 +46,9 @@ CXXFLAGS += -O3 -march=native -mtune=native -DNDEBUG ifeq ($(CXX),clang++) CXXFLAGS += -flto=thin LDFLAGS += -O3 -march=native -mtune=native -flto=thin else ifeq ($(CXX),g++) CXXFLAGS += -flto LDFLAGS += -O3 -march=native -mtune=native -flto endif else ifeq ($(BUILD),Debug) CXXFLAGS += -Og -g Loading @@ -60,21 +62,26 @@ LDFLAGS += -fuse-ld=lld endif ## add flags for linking CUDA with the host compiler CUDA_LDFLAGS += -L $(CUDA_PATH)/lib64/ ifeq ($(STATIC_LINKING),True) CUDA_LDLIBS += -lcudart_static else CUDA_LDLIBS += -lcudart endif CUDA_LDLIBS += -ldl -lrt ## options for nvcc # automatic dependency generation for nvcc (gcc has it automated in the pre-processor phase with the -MD flag) # note that $@ is expanded only when $(CUDA_CPPFLAGS) is used CUDA_CPPFLAGS = --compiler-options -MD,-MP,-MT$@ CUDA_CXXFLAGS = -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER) CUDA_LDLIBS = -lstdc++fs CUDA_CPPFLAGS := -MD -MP CUDA_CXXFLAGS := -std=c++14 --compiler-bindir $(CUDA_HOST_COMPILER) # enable nvcc features CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda --default-stream per-thread # disable false compiler warnings # reference for the -Xcudafe flag: http://stackoverflow.com/questions/14831051/how-to-disable-compiler-warnings-with-nvcc/17095910#17095910 # list of possible tokens: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg CUDA_CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla # This diagnostic is just plain wrong in CUDA 9 # See https://github.com/kokkos/kokkos/issues/1470 CUDA_CXXFLAGS += -Xcudafe "--diag_suppress=code_is_unreachable --diag_suppress=loop_not_reachable --diag_suppress=implicit_return_from_non_void_function --diag_suppress=unsigned_compare_with_zero --display_error_number" -Xcompiler -Wno-vla # This diagnostic is just plain wrong in CUDA 9 and later, see https://github.com/kokkos/kokkos/issues/1470 CUDA_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored # disable deprecation warnings CUDA_CXXFLAGS += -Xcudafe --diag_suppress=1444 ifeq ($(BUILD),Release) CUDA_CXXFLAGS += -O3 -DNDEBUG -Xcompiler -march=native,-mtune=native Loading @@ -84,20 +91,9 @@ endif ## options for clang as CUDA compiler ifeq ($(CUDA_COMPILER),clang++) CUDA_CPPFLAGS = $(CPPFLAGS) # immediate expansion of the following two vars CUDA_CPPFLAGS := $(CPPFLAGS) CUDA_CXXFLAGS := $(CXXFLAGS) CUDA_LDFLAGS := $(LDFLAGS) # FIXME: better detection _CUDA_PATH = $(dir $(subst -I,,$(shell pkg-config --cflags-only-I cuda))) CUDA_CXXFLAGS += --cuda-path=$(_CUDA_PATH) CUDA_LDFLAGS += -L $(_CUDA_PATH)/lib64/ ifeq ($(STATIC_LINKING),True) CUDA_LDLIBS += -lcudart_static else CUDA_LDLIBS += -lcudart endif CUDA_LDLIBS += -ldl -lrt CUDA_CXXFLAGS += --cuda-path=$(CUDA_PATH) # include info for profiling #CUDA_CXXFLAGS += -Xcuda-fatbinary --cmdline -Xcuda-fatbinary " --generate-line-info " -Xcuda-ptxas --generate-line-info -g # debugging register spills Loading @@ -108,32 +104,36 @@ CUDA_LDLIBS += -ldl -lrt #CUDA_CXXFLAGS += -Xcuda-ptxas -dlcm=ca endif # select CUDA GPU architecture # set the CUDA GPU architecture ifeq ($(CUDA_COMPILER),clang++) ifeq ($(CUDA_GPU_ARCH),auto) CUDA_CXXFLAGS += $(shell tnl-cuda-arch --clang) else CUDA_CXXFLAGS += --cuda-gpu-arch=$(CUDA_GPU_ARCH) endif else ifeq ($(CUDA_GPU_ARCH),auto) CUDA_CXXFLAGS += $(shell tnl-cuda-arch) else CUDA_CXXFLAGS += --gpu-architecture=$(CUDA_GPU_ARCH) endif endif # append flags for TNL PKGS = tnl PKGS_CUDA = tnl-cuda CXXFLAGS += $(shell pkg-config --cflags $(PKGS)) CUDA_CXXFLAGS += $(shell pkg-config --cflags $(PKGS_CUDA)) #ifeq ($(STATIC_LINKING),True) ## hack because cmake does not support shared and static libraries with the same name #LDLIBS += $(subst tnl,tnl_static,$(shell pkg-config --libs $(PKGS))) #CUDA_LDLIBS += $(subst tnl,tnl-cuda_static,$(shell pkg-config --libs $(PKGS_CUDA))) #else #LDLIBS += $(shell pkg-config --libs $(PKGS)) #CUDA_LDLIBS += $(shell pkg-config --libs $(PKGS_CUDA)) #endif # TODO: add TNL as a git submodule #TNL_INCLUDE_DIRS := -I $(mkfile_path)/libs/tnl/src/ -I $(mkfile_path)/libs/tnl/src/3rdparty/ TNL_INCLUDE_DIRS := -I ~/.local/include CPPFLAGS += $(TNL_INCLUDE_DIRS) CUDA_CPPFLAGS += $(TNL_INCLUDE_DIRS) -DHAVE_CUDA # OpenMP ifeq ($(CXX),clang++) OPENMP_CXXFLAGS := -fopenmp=libomp -DHAVE_OPENMP OPENMP_LDLIBS := -lomp else ifeq ($(CXX),icpc) OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP OPENMP_LDLIBS := -liomp5 else OPENMP_CXXFLAGS := -fopenmp -DHAVE_OPENMP OPENMP_LDLIBS := -lgomp Loading @@ -158,10 +158,13 @@ endif # base targets .PHONY: all cuda host all: cuda host .PHONY: all cuda host mpi_host mpi_cuda mpi all: cuda host mpi host: $(TARGETS) subdirs cuda: $(CUDA_TARGETS) mpi_host: $(MPI_TARGETS) mpi_cuda: $(MPI_CUDA_TARGETS) mpi: mpi_host mpi_cuda # descend into $(SUBDIRS) (when defined) .PHONY: subdirs $(SUBDIRS) Loading @@ -177,7 +180,7 @@ ifdef SOURCES $(RM) $(SOURCES:%.cpp=%.o) $(SOURCES:%.cpp=%.d) endif ifdef CUDA_SOURCES $(RM) $(CUDA_SOURCES:%.cu=%.cu.o) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d) $(RM) $(CUDA_SOURCES:%.cu=%.cuo) $(CUDA_SOURCES:%.cu=%.d) $(CUDA_SOURCES:%.cu=%.cu.d) endif ifdef TARGETS $(RM) $(TARGETS) Loading @@ -185,7 +188,14 @@ endif ifdef CUDA_TARGETS $(RM) $(CUDA_TARGETS) endif $(RM) tmpxft_*.d ifdef MPI_TARGETS $(RM) $(MPI_SOURCES:%.cpp=%_mpi.o) $(MPI_SOURCES:%.cpp=%_mpi.d) $(RM) $(MPI_TARGETS) endif ifdef MPI_CUDA_TARGETS $(RM) $(MPI_CUDA_TARGETS) $(RM) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo) $(MPI_CUDA_SOURCES:%.cu=%_mpi.d) $(MPI_CUDA_SOURCES:%.cu=%_mpi.cu.d) endif # recursive cleanup .PHONY: distclean Loading @@ -197,11 +207,30 @@ distclean: clean $(TARGETS): % : %.o $(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(CUDA_TARGETS): % : %.cu.o $(CUDA_COMPILER) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) # use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939 # (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file) $(CUDA_TARGETS): % : %.cuo $(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(SOURCES:%.cpp=%.o): %.o: %.cpp $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(CUDA_SOURCES:%.cu=%.cu.o): %.cu.o : %.cu $(CUDA_SOURCES:%.cu=%.cuo): %.cuo : %.cu $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -c -o $@ $< # rules for MPI targets $(MPI_TARGETS): % : %.o $(CXX) $(LDFLAGS) -o $@ $< $(LDLIBS) $(MPI_LDLIBS) $(MPI_CUDA_TARGETS): % : %.cuo $(CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) $(MPI_LDLIBS) $(MPI_SOURCES:%.cpp=%_mpi.o): %_mpi.o: %.cpp $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $< $(MPI_CUDA_SOURCES:%.cu=%_mpi.cuo): %_mpi.cuo : %.cu ifeq ($(CUDA_COMPILER),nvcc) $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_NVCC_CXXFLAGS) -c -o $@ $< else $(CUDA_COMPILER) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) $(MPI_CXXFLAGS) -c -o $@ $< endif
config.mk 0 → 100644 +30 −0 Original line number Diff line number Diff line # All variables are also configurable from the command line (e.g. make TNL_CXX=clang++ ) # or from the environment (e.g. TNL_CXX=clang++ make ). # # If you are building targets in a subdirectory, you can create a local config.mk # file in that subdirectory to override options from the global config.mk file. # build type (can be 'Release' or 'Debug') BUILD ?= Release # compiler for the .cpp files # Note that we have to use TNL_CXX instead of CXX to set the default value, # because CXX is an implicit variable defined by GNU make itself. (Thank you!) TNL_CXX ?= g++ CXX := $(TNL_CXX) # compiler for the .cu files CUDA_COMPILER ?= nvcc # compiler for the host/CPU code used by nvcc CUDA_HOST_COMPILER ?= $(CXX) # path to the CUDA toolkit installation # (autodetection is attempted, set it manually if it fails) CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..) #$(info Detected CUDA_PATH: $(CUDA_PATH)) # CUDA GPU architecture (e.g. "sm_61" or "auto") # (if you use "auto", tnl-cuda-arch must be installed in your $PATH) CUDA_GPU_ARCH ?= auto #CUDA_GPU_ARCH ?= sm_70