From ec55ffe817e9a7b5140492d30128253457a0f427 Mon Sep 17 00:00:00 2001 From: Xuan Thang Nguyen <nguyexu2@fit.cvut.cz> Date: Tue, 2 Mar 2021 16:08:23 +0100 Subject: [PATCH] compilation --- quicksort/Makefile | 24 ++++++++++++++++++++ quicksort/config.mk | 49 +++++++++++++++++++++++++++++++++++++++++ quicksort/main.cu | 21 ++++++++++++++++++ quicksort/quicksort.cuh | 27 ++++++++++++++++++++++- 4 files changed, 120 insertions(+), 1 deletion(-) diff --git a/quicksort/Makefile b/quicksort/Makefile index e69de29..d301117 100644 --- a/quicksort/Makefile +++ b/quicksort/Makefile @@ -0,0 +1,24 @@ +include config.mk + +CUDA_SOURCES := $(wildcard *.cu) +CUDA_TARGETS := $(CUDA_SOURCES:%.cu=%) + +## targets definitions follow +.PHONY: all host cuda +all: cuda +cuda: $(CUDA_TARGETS) + +run: cuda + ./$(CUDA_TARGETS) + +.PHONY: clean +clean: + rm -f *.d *.o *.cuo $(CUDA_TARGETS) + +# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939 +# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file) +$(CUDA_TARGETS): % : %.o + $(CUDA_CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) -lcudadevrt + +$(CUDA_SOURCES:%.cu=%.o): %.o : %.cu + $(CUDA_CXX) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -gencode arch=compute_52,code=sm_52 -dc -c -o $@ $< \ No newline at end of file diff --git a/quicksort/config.mk b/quicksort/config.mk index e69de29..3715986 100644 --- a/quicksort/config.mk +++ b/quicksort/config.mk @@ -0,0 +1,49 @@ +# configure the include path(s) according to your TNL installation +TNL_INCLUDE_DIRS := -I ~/.local/include + +WITH_OPENMP := no +WITH_DEBUG := no + +# If TNL is installed on your system, the CUDA architecture can be detected +# automatically by tnl-cuda-arch. This is done if CUDA_ARCH is set to "auto". +# Otherwise, CUDA_ARCH has to be set manually to the desired CUDA architecture +# number, e.g. 60, 61, etc. +CUDA_ARCH := auto + +# compilers +CXX := g++ +CUDA_CXX := nvcc + +# host compiler flags +CXXFLAGS := -std=c++14 $(TNL_INCLUDE_DIRS) +ifeq ($(WITH_DEBUG),yes) + CXXFLAGS += -O0 -g +else + CXXFLAGS += -O3 -DNDEBUG +endif + +# CUDA compiler flags +CUDA_CXXFLAGS := -std=c++14 --expt-relaxed-constexpr --expt-extended-lambda $(TNL_INCLUDE_DIRS) +CUDA_CXXFLAGS += -DHAVE_CUDA +ifeq ($(CUDA_ARCH),auto) + CUDA_CXXFLAGS += $(shell tnl-cuda-arch) +else + CUDA_CXXFLAGS += -gencode arch=compute_$(CUDA_ARCH),code=sm_$(CUDA_ARCH) +endif + +# determine path to the CUDA toolkit installation +# (autodetection is attempted, set it manually if it fails) +CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..) +#$(info Detected CUDA_PATH: $(CUDA_PATH)) + +# flags for linking CUDA with the host compiler +CUDA_LDFLAGS := -L $(CUDA_PATH)/lib64 +CUDA_LDLIBS := -lcudart -ldl -lrt + +# enable OpenMP +ifeq ($(WITH_OPENMP),yes) + CXXFLAGS += -fopenmp -DHAVE_OPENMP + LDLIBS += -lgomp + CUDA_CXXFLAGS += -Xcompiler -fopenmp -DHAVE_OPENMP + CUDA_LDLIBS += -lgomp +endif diff --git a/quicksort/main.cu b/quicksort/main.cu index e69de29..fa69fd7 100644 --- a/quicksort/main.cu +++ b/quicksort/main.cu @@ -0,0 +1,21 @@ +#include <TNL/Containers/Array.h> +#include "quicksort.cuh" +#include "../util/algorithm.h" + +#include <iostream> +#include <algorithm> +using namespace std; + +int main() +{ + vector<int> vec(19); + for(auto & x : vec) x = rand()%30; + + TNL::Containers::Array<int, TNL::Devices::Cuda> arr(vec); + auto view = arr.getView(); + cout << view << endl; + quicksort(view); + cout << view << endl; + + return 0; +} \ No newline at end of file diff --git a/quicksort/quicksort.cuh b/quicksort/quicksort.cuh index d3b0e95..1b55bc0 100644 --- a/quicksort/quicksort.cuh +++ b/quicksort/quicksort.cuh @@ -4,7 +4,32 @@ __global__ void quicksortCuda(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> arr, int begin, int end) { - + if(begin >= end) + return; + + int pivotIdx = end - 1; + int pivot = arr[pivotIdx]; + + int midPoint = begin; //[begin ; midPoint) contain elems smaller than pivot + + //partition the array except for last elem (the pivot itself) + for(int i = begin; i + 1< end; i++) + { + if(arr[i] < pivot) + { + TNL::swap(arr[i], arr[midPoint]); + midPoint++; //increase boundary + } + } + + //put pivot onto its correct position, now [begin, midpoint] is sorted + TNL::swap(arr[midPoint], arr[pivotIdx]); + + //sorts all elems before midPoint(which is pivot now) + quicksortCuda<<<1, 1>>>(arr, begin, midPoint); + + //sorts all elems after(bigger than) midPoint + quicksortCuda<<<1, 1>>>(arr, midPoint+1, end); } -- GitLab