compilation

ec55ffe8 · Xuan Thang Nguyen · 18c2699b · ec55ffe8 · ec55ffe8 · ec55ffe8
Commit ec55ffe8 authored 4 years ago by Xuan Thang Nguyen
--- a/quicksort/Makefile
+++ b/quicksort/Makefile
+include config.mk
+CUDA_SOURCES := $(wildcard *.cu)
+CUDA_TARGETS := $(CUDA_SOURCES:%.cu=%)
+## targets definitions follow
+.PHONY: all host cuda
+all: cuda
+cuda: $(CUDA_TARGETS)
+run: cuda
+	./$(CUDA_TARGETS)
+.PHONY: clean
+clean:
+	rm -f *.d *.o *.cuo $(CUDA_TARGETS)
+# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939
+# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file)
+$(CUDA_TARGETS): % : %.o
+	$(CUDA_CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) -lcudadevrt
+$(CUDA_SOURCES:%.cu=%.o): %.o : %.cu
+	$(CUDA_CXX) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -gencode arch=compute_52,code=sm_52 -dc -c -o $@ $<
\ No newline at end of file
--- a/quicksort/config.mk
+++ b/quicksort/config.mk
+# configure the include path(s) according to your TNL installation
+TNL_INCLUDE_DIRS := -I ~/.local/include
+WITH_OPENMP := no
+WITH_DEBUG := no
+# If TNL is installed on your system, the CUDA architecture can be detected
+# automatically by tnl-cuda-arch. This is done if CUDA_ARCH is set to "auto".
+# Otherwise, CUDA_ARCH has to be set manually to the desired CUDA architecture
+# number, e.g. 60, 61, etc.
+CUDA_ARCH := auto
+# compilers
+CXX := g++
+CUDA_CXX := nvcc
+# host compiler flags
+CXXFLAGS := -std=c++14 $(TNL_INCLUDE_DIRS)
+ifeq ($(WITH_DEBUG),yes)
+    CXXFLAGS += -O0 -g
+else
+    CXXFLAGS += -O3 -DNDEBUG
+endif
+# CUDA compiler flags
+CUDA_CXXFLAGS := -std=c++14 --expt-relaxed-constexpr --expt-extended-lambda $(TNL_INCLUDE_DIRS)
+CUDA_CXXFLAGS += -DHAVE_CUDA
+ifeq ($(CUDA_ARCH),auto)
+    CUDA_CXXFLAGS += $(shell tnl-cuda-arch)
+else
+    CUDA_CXXFLAGS += -gencode arch=compute_$(CUDA_ARCH),code=sm_$(CUDA_ARCH)
+endif
+# determine path to the CUDA toolkit installation
+# (autodetection is attempted, set it manually if it fails)
+CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..)
+#$(info Detected CUDA_PATH: $(CUDA_PATH))
+# flags for linking CUDA with the host compiler
+CUDA_LDFLAGS := -L $(CUDA_PATH)/lib64
+CUDA_LDLIBS := -lcudart -ldl -lrt
+# enable OpenMP
+ifeq ($(WITH_OPENMP),yes)
+    CXXFLAGS += -fopenmp -DHAVE_OPENMP
+    LDLIBS += -lgomp
+    CUDA_CXXFLAGS += -Xcompiler -fopenmp -DHAVE_OPENMP
+    CUDA_LDLIBS += -lgomp
+endif
--- a/quicksort/main.cu
+++ b/quicksort/main.cu
+#include <TNL/Containers/Array.h>
+#include "quicksort.cuh"
+#include "../util/algorithm.h"
+#include <iostream>
+#include <algorithm>
+using namespace std;
+int main()
+{
+    vector<int> vec(19);
+    for(auto & x : vec) x = rand()%30;
+    TNL::Containers::Array<int, TNL::Devices::Cuda> arr(vec);
+    auto view = arr.getView();
+    cout << view << endl;
+    quicksort(view);
+    cout << view << endl;
+    return 0;
+}
\ No newline at end of file
--- a/quicksort/quicksort.cuh
+++ b/quicksort/quicksort.cuh
@@ -4,7 +4,32 @@
 __global__ void quicksortCuda(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> arr, int begin, int end)
 {
+    if(begin >= end)
+        return;
+    int pivotIdx = end - 1;
+    int pivot = arr[pivotIdx];
+    int midPoint = begin; //[begin ; midPoint) contain elems smaller than pivot
+    //partition the array except for last elem (the pivot itself) 
+    for(int i = begin; i + 1< end; i++)
+    {
+        if(arr[i] < pivot)
+        {
+            TNL::swap(arr[i], arr[midPoint]);
+            midPoint++; //increase boundary
+        }
+    }
+    //put pivot onto its correct position, now [begin, midpoint] is sorted
+    TNL::swap(arr[midPoint], arr[pivotIdx]);
+    //sorts all elems before midPoint(which is pivot now)
+    quicksortCuda<<<1, 1>>>(arr, begin, midPoint);
+    //sorts all elems after(bigger than) midPoint
+    quicksortCuda<<<1, 1>>>(arr, midPoint+1, end);
 }