From ec55ffe817e9a7b5140492d30128253457a0f427 Mon Sep 17 00:00:00 2001
From: Xuan Thang Nguyen <nguyexu2@fit.cvut.cz>
Date: Tue, 2 Mar 2021 16:08:23 +0100
Subject: [PATCH] compilation

---
 quicksort/Makefile      | 24 ++++++++++++++++++++
 quicksort/config.mk     | 49 +++++++++++++++++++++++++++++++++++++++++
 quicksort/main.cu       | 21 ++++++++++++++++++
 quicksort/quicksort.cuh | 27 ++++++++++++++++++++++-
 4 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/quicksort/Makefile b/quicksort/Makefile
index e69de29..d301117 100644
--- a/quicksort/Makefile
+++ b/quicksort/Makefile
@@ -0,0 +1,24 @@
+include config.mk
+
+CUDA_SOURCES := $(wildcard *.cu)
+CUDA_TARGETS := $(CUDA_SOURCES:%.cu=%)
+
+## targets definitions follow
+.PHONY: all host cuda
+all: cuda
+cuda: $(CUDA_TARGETS)
+
+run: cuda
+	./$(CUDA_TARGETS)
+
+.PHONY: clean
+clean:
+	rm -f *.d *.o *.cuo $(CUDA_TARGETS)
+
+# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939
+# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file)
+$(CUDA_TARGETS): % : %.o
+	$(CUDA_CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) -lcudadevrt
+
+$(CUDA_SOURCES:%.cu=%.o): %.o : %.cu
+	$(CUDA_CXX) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -gencode arch=compute_52,code=sm_52 -dc -c -o $@ $<
\ No newline at end of file
diff --git a/quicksort/config.mk b/quicksort/config.mk
index e69de29..3715986 100644
--- a/quicksort/config.mk
+++ b/quicksort/config.mk
@@ -0,0 +1,49 @@
+# configure the include path(s) according to your TNL installation
+TNL_INCLUDE_DIRS := -I ~/.local/include
+
+WITH_OPENMP := no
+WITH_DEBUG := no
+
+# If TNL is installed on your system, the CUDA architecture can be detected
+# automatically by tnl-cuda-arch. This is done if CUDA_ARCH is set to "auto".
+# Otherwise, CUDA_ARCH has to be set manually to the desired CUDA architecture
+# number, e.g. 60, 61, etc.
+CUDA_ARCH := auto
+
+# compilers
+CXX := g++
+CUDA_CXX := nvcc
+
+# host compiler flags
+CXXFLAGS := -std=c++14 $(TNL_INCLUDE_DIRS)
+ifeq ($(WITH_DEBUG),yes)
+    CXXFLAGS += -O0 -g
+else
+    CXXFLAGS += -O3 -DNDEBUG
+endif
+
+# CUDA compiler flags
+CUDA_CXXFLAGS := -std=c++14 --expt-relaxed-constexpr --expt-extended-lambda $(TNL_INCLUDE_DIRS)
+CUDA_CXXFLAGS += -DHAVE_CUDA
+ifeq ($(CUDA_ARCH),auto)
+    CUDA_CXXFLAGS += $(shell tnl-cuda-arch)
+else
+    CUDA_CXXFLAGS += -gencode arch=compute_$(CUDA_ARCH),code=sm_$(CUDA_ARCH)
+endif
+
+# determine path to the CUDA toolkit installation
+# (autodetection is attempted, set it manually if it fails)
+CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..)
+#$(info Detected CUDA_PATH: $(CUDA_PATH))
+
+# flags for linking CUDA with the host compiler
+CUDA_LDFLAGS := -L $(CUDA_PATH)/lib64
+CUDA_LDLIBS := -lcudart -ldl -lrt
+
+# enable OpenMP
+ifeq ($(WITH_OPENMP),yes)
+    CXXFLAGS += -fopenmp -DHAVE_OPENMP
+    LDLIBS += -lgomp
+    CUDA_CXXFLAGS += -Xcompiler -fopenmp -DHAVE_OPENMP
+    CUDA_LDLIBS += -lgomp
+endif
diff --git a/quicksort/main.cu b/quicksort/main.cu
index e69de29..fa69fd7 100644
--- a/quicksort/main.cu
+++ b/quicksort/main.cu
@@ -0,0 +1,21 @@
+#include <TNL/Containers/Array.h>
+#include "quicksort.cuh"
+#include "../util/algorithm.h"
+
+#include <iostream>
+#include <algorithm>
+using namespace std;
+
+int main()
+{
+    vector<int> vec(19);
+    for(auto & x : vec) x = rand()%30;
+
+    TNL::Containers::Array<int, TNL::Devices::Cuda> arr(vec);
+    auto view = arr.getView();
+    cout << view << endl;
+    quicksort(view);
+    cout << view << endl;
+
+    return 0;
+}
\ No newline at end of file
diff --git a/quicksort/quicksort.cuh b/quicksort/quicksort.cuh
index d3b0e95..1b55bc0 100644
--- a/quicksort/quicksort.cuh
+++ b/quicksort/quicksort.cuh
@@ -4,7 +4,32 @@
 
 __global__ void quicksortCuda(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> arr, int begin, int end)
 {
-    
+    if(begin >= end)
+        return;
+
+    int pivotIdx = end - 1;
+    int pivot = arr[pivotIdx];
+
+    int midPoint = begin; //[begin ; midPoint) contain elems smaller than pivot
+
+    //partition the array except for last elem (the pivot itself) 
+    for(int i = begin; i + 1< end; i++)
+    {
+        if(arr[i] < pivot)
+        {
+            TNL::swap(arr[i], arr[midPoint]);
+            midPoint++; //increase boundary
+        }
+    }
+
+    //put pivot onto its correct position, now [begin, midpoint] is sorted
+    TNL::swap(arr[midPoint], arr[pivotIdx]);
+
+    //sorts all elems before midPoint(which is pivot now)
+    quicksortCuda<<<1, 1>>>(arr, begin, midPoint);
+
+    //sorts all elems after(bigger than) midPoint
+    quicksortCuda<<<1, 1>>>(arr, midPoint+1, end);
 
 }
 
-- 
GitLab