Commit 5b7045ed authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

support for structure sorting

parent a8d88571
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ __device__ Value pickPivot(TNL::Containers::ArrayView<Value, Device> src, const
}

template <typename Value, typename Device, typename Function>
__device__ Value pickPivotIdx(TNL::Containers::ArrayView<Value, Device> src, const Function &Cmp)
__device__ int pickPivotIdx(TNL::Containers::ArrayView<Value, Device> src, const Function &Cmp)
{
    //return 0;
    //return src.getSize()-1;
+2 −13
Original line number Diff line number Diff line
@@ -75,15 +75,12 @@ __global__ void cudaQuickSort1stPhase(ArrayView<Value, Devices::Cuda> arr, Array
    extern __shared__ int externMem[];
    Value *sharedMem = (Value *)externMem;

    static __shared__ Value pivot;

    TASK &myTask = tasks[taskMapping[blockIdx.x]];
    auto &src = (myTask.depth & 1) == 0 ? arr : aux;
    auto &dst = (myTask.depth & 1) == 0 ? aux : arr;

    if (threadIdx.x == 0)
        pivot = src[myTask.pivotIdx];
    __syncthreads();
    Value pivot = src[myTask.pivotIdx];

    cudaPartition<Value, Function, useShared>(
        src.getView(myTask.partitionBegin, myTask.partitionEnd),
@@ -99,17 +96,9 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va
                               ArrayView<TASK, Devices::Cuda> tasks, ArrayView<TASK, Devices::Cuda> newTasks, int *newTasksCnt,
                               ArrayView<TASK, Devices::Cuda> secondPhaseTasks, int *secondPhaseTasksCnt)
{
    static __shared__ Value pivot;
    TASK &myTask = tasks[blockIdx.x];

    if (threadIdx.x == 0)
    {
        if ((myTask.depth & 1) == 0)
            pivot = arr[myTask.pivotIdx];
        else
            pivot = aux[myTask.pivotIdx];
    }
    __syncthreads();
    Value pivot = (myTask.depth & 1) == 0 ? arr[myTask.pivotIdx] : aux[myTask.pivotIdx];

    int leftBegin = myTask.partitionBegin, leftEnd = myTask.partitionBegin + myTask.dstBegin;
    int rightBegin = myTask.partitionBegin + myTask.dstEnd, rightEnd = myTask.partitionEnd;
+18 −7
Original line number Diff line number Diff line
@@ -17,6 +17,14 @@ __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src,
    bitonicSort_Block(src, dst, sharedMem, Cmp);
}

template <typename Value, typename Function>
__device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src,
                           ArrayView<Value, TNL::Devices::Cuda> dst,
                           const Function &Cmp)
{
    bitonicSort_Block(src, dst, Cmp);
}

template <int stackSize>
__device__ void stackPush(int stackArrBegin[], int stackArrEnd[],
                          int stackDepth[], int &stackTop,
@@ -79,7 +87,11 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr,
    if (arr.getSize() <= blockDim.x * 2)
    {
        auto src = (_depth & 1) == 0 ? arr : aux;
        if(useShared)
            externSort<Value, Function>(src, arr, Cmp, sharedMem);
        else
            externSort<Value, Function>(src, arr, Cmp);

        return;
    }

@@ -87,7 +99,6 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr,
    static __shared__ int stackArrBegin[stackSize], stackArrEnd[stackSize], stackDepth[stackSize];
    static __shared__ int begin, end, depth;
    static __shared__ int pivotBegin, pivotEnd;
    static __shared__ Value pivot;

    if (threadIdx.x == 0)
    {
@@ -117,16 +128,16 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr,
        //small enough for for bitonic
        if (size <= blockDim.x * 2)
        {
            if(useShared)
                externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem);
            else
                externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp);
            __syncthreads();
            continue;
        }
        //------------------------------------------------------

        //actually do partitioning from here on out
        if (threadIdx.x == 0)
            pivot = pickPivot(src.getView(begin, end), Cmp);
        __syncthreads();
        Value pivot = pickPivot(src.getView(begin, end), Cmp);

        int smaller = 0, bigger = 0;
        countElem(src.getView(begin, end), Cmp, smaller, bigger, pivot);
+22 −0
Original line number Diff line number Diff line
@@ -141,6 +141,28 @@ TEST(types, type_double)
    ASSERT_TRUE(view == cudaArr2.getView());
}

struct TMPSTRUCT{
    uint8_t m_data[16];

    __cuda_callable__ TMPSTRUCT(){m_data[0] = 0;}
    __cuda_callable__ TMPSTRUCT(int first){m_data[0] = first;};
    __cuda_callable__ bool operator <(const TMPSTRUCT& other) const { return m_data[0] < other.m_data[0];}
};


TEST(types, struct)
{
    std::srand(8451);

    int size = (1<<13);
    std::vector<TMPSTRUCT> arr(size);
    for(auto & x : arr) x = TMPSTRUCT(std::rand());

    TNL::Containers::Array<TMPSTRUCT, TNL::Devices::Cuda> cudaArr(arr);
    auto view = cudaArr.getView();
    quicksort(view);
}

//----------------------------------------------------------------------------------

int main(int argc, char **argv)