Commit f6a68a97 authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

rename prefixSum to inclusivePrefixSum

parent af5ad08b
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -49,8 +49,8 @@ __global__ void cudaPartition(CudaArrayView arr, int begin, int end,
    int smaller = 0, bigger = 0;
    cmpElem(arr, myBegin, myEnd, pivot, smaller, bigger);

    int smallerOffset = blockPrefixSum(smaller);
    int biggerOffset = blockPrefixSum(bigger);
    int smallerOffset = blockInclusivePrefixSum(smaller);
    int biggerOffset = blockInclusivePrefixSum(bigger);

    if (threadIdx.x == blockDim.x - 1)
    {
+4 −4
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ __device__ int blockReduceSum(int val)
    return shared[0];
}

__device__ int warpPrefixSum(int value)
__device__ int warpInclusivePrefixSum(int value)
{
    int laneId = threadIdx.x & 0x1f;
    for (int i = 1; i*2 <= warpSize; i *= 2)
@@ -49,13 +49,13 @@ __device__ int warpPrefixSum(int value)
    return value;
}

__device__ int blockPrefixSum(int value)
__device__ int blockInclusivePrefixSum(int value)
{
    static __shared__ int shared[32];
    int lane = threadIdx.x & (warpSize - 1);
    int wid = threadIdx.x / warpSize;

    int tmp = warpPrefixSum(value);
    int tmp = warpInclusivePrefixSum(value);

    if (lane == warpSize-1)
        shared[wid] = tmp;
@@ -63,7 +63,7 @@ __device__ int blockPrefixSum(int value)

    int tmp2 = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
    if (wid == 0)
        shared[lane] = warpPrefixSum(tmp2) - shared[lane];
        shared[lane] = warpInclusivePrefixSum(tmp2) - tmp2;
    __syncthreads();
    
    tmp += shared[wid];