Commit df618fde authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

use prefixSum instead of reduction

parent 9328c524
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -47,19 +47,17 @@ __global__ void cudaPartition(CudaArrayView arr, int begin, int end,
    int smaller = 0, bigger = 0;
    cmpElem(arr, myBegin, myEnd, pivot, smaller, bigger);

    int smallerOffset = blockReduceSum(smaller);
    int biggerOffset = blockReduceSum(bigger);
    int smallerOffset = blockPrefixSum(smaller);
    int biggerOffset = blockPrefixSum(bigger);

    if (threadIdx.x == 0)
    if (threadIdx.x == blockDim.x - 1)
    {
        *smallerStart = atomicAdd(auxBeginIdx, smallerOffset);
        *biggerStart = atomicAdd(auxEndIdx, -biggerOffset) - biggerOffset;
    }
    __syncthreads();

    int auxThreadSmallerBegin = atomicAdd(smallerStart, smaller);
    int auxThreadBiggerBegin = atomicAdd(biggerStart, bigger);
    copyData(arr, myBegin, myEnd, pivot, aux, auxThreadSmallerBegin, auxThreadBiggerBegin);
    copyData(arr, myBegin, myEnd, pivot, aux, (*smallerStart) + smallerOffset - smaller, (*biggerStart) + biggerOffset - bigger);
    __syncthreads();
    
    //inserts pivot