Skip to content
Snippets Groups Projects
Commit ec55ffe8 authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

compilation

parent 18c2699b
No related branches found
No related tags found
No related merge requests found
include config.mk
CUDA_SOURCES := $(wildcard *.cu)
CUDA_TARGETS := $(CUDA_SOURCES:%.cu=%)
## targets definitions follow
.PHONY: all host cuda
all: cuda
cuda: $(CUDA_TARGETS)
run: cuda
./$(CUDA_TARGETS)
.PHONY: clean
clean:
rm -f *.d *.o *.cuo $(CUDA_TARGETS)
# use .cuo instead of .cu.o to avoid problems with the implicit rules: https://stackoverflow.com/q/62967939
# (and use the host compiler for linking CUDA, nvcc does not understand that .cuo is an object file)
$(CUDA_TARGETS): % : %.o
$(CUDA_CXX) $(CUDA_LDFLAGS) -o $@ $< $(CUDA_LDLIBS) -lcudadevrt
$(CUDA_SOURCES:%.cu=%.o): %.o : %.cu
$(CUDA_CXX) $(CUDA_CPPFLAGS) $(CUDA_CXXFLAGS) -gencode arch=compute_52,code=sm_52 -dc -c -o $@ $<
\ No newline at end of file
# configure the include path(s) according to your TNL installation
TNL_INCLUDE_DIRS := -I ~/.local/include
WITH_OPENMP := no
WITH_DEBUG := no
# If TNL is installed on your system, the CUDA architecture can be detected
# automatically by tnl-cuda-arch. This is done if CUDA_ARCH is set to "auto".
# Otherwise, CUDA_ARCH has to be set manually to the desired CUDA architecture
# number, e.g. 60, 61, etc.
CUDA_ARCH := auto
# compilers
CXX := g++
CUDA_CXX := nvcc
# host compiler flags
CXXFLAGS := -std=c++14 $(TNL_INCLUDE_DIRS)
ifeq ($(WITH_DEBUG),yes)
CXXFLAGS += -O0 -g
else
CXXFLAGS += -O3 -DNDEBUG
endif
# CUDA compiler flags
CUDA_CXXFLAGS := -std=c++14 --expt-relaxed-constexpr --expt-extended-lambda $(TNL_INCLUDE_DIRS)
CUDA_CXXFLAGS += -DHAVE_CUDA
ifeq ($(CUDA_ARCH),auto)
CUDA_CXXFLAGS += $(shell tnl-cuda-arch)
else
CUDA_CXXFLAGS += -gencode arch=compute_$(CUDA_ARCH),code=sm_$(CUDA_ARCH)
endif
# determine path to the CUDA toolkit installation
# (autodetection is attempted, set it manually if it fails)
CUDA_PATH ?= $(abspath $(dir $(shell command -v nvcc))/..)
#$(info Detected CUDA_PATH: $(CUDA_PATH))
# flags for linking CUDA with the host compiler
CUDA_LDFLAGS := -L $(CUDA_PATH)/lib64
CUDA_LDLIBS := -lcudart -ldl -lrt
# enable OpenMP
ifeq ($(WITH_OPENMP),yes)
CXXFLAGS += -fopenmp -DHAVE_OPENMP
LDLIBS += -lgomp
CUDA_CXXFLAGS += -Xcompiler -fopenmp -DHAVE_OPENMP
CUDA_LDLIBS += -lgomp
endif
#include <TNL/Containers/Array.h>
#include "quicksort.cuh"
#include "../util/algorithm.h"
#include <iostream>
#include <algorithm>
using namespace std;
int main()
{
vector<int> vec(19);
for(auto & x : vec) x = rand()%30;
TNL::Containers::Array<int, TNL::Devices::Cuda> arr(vec);
auto view = arr.getView();
cout << view << endl;
quicksort(view);
cout << view << endl;
return 0;
}
\ No newline at end of file
...@@ -4,7 +4,32 @@ ...@@ -4,7 +4,32 @@
__global__ void quicksortCuda(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> arr, int begin, int end) __global__ void quicksortCuda(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> arr, int begin, int end)
{ {
if(begin >= end)
return;
int pivotIdx = end - 1;
int pivot = arr[pivotIdx];
int midPoint = begin; //[begin ; midPoint) contain elems smaller than pivot
//partition the array except for last elem (the pivot itself)
for(int i = begin; i + 1< end; i++)
{
if(arr[i] < pivot)
{
TNL::swap(arr[i], arr[midPoint]);
midPoint++; //increase boundary
}
}
//put pivot onto its correct position, now [begin, midpoint] is sorted
TNL::swap(arr[midPoint], arr[pivotIdx]);
//sorts all elems before midPoint(which is pivot now)
quicksortCuda<<<1, 1>>>(arr, begin, midPoint);
//sorts all elems after(bigger than) midPoint
quicksortCuda<<<1, 1>>>(arr, midPoint+1, end);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment