diff --git a/quicksort/reduction.cuh b/quicksort/reduction.cuh
index 3efb309b8ae71d1a937c33e5c5f6028a484ea223..48d3ed32dc3e41943fe3abe4e54478fd6d5d29bd 100644
--- a/quicksort/reduction.cuh
+++ b/quicksort/reduction.cuh
@@ -34,4 +34,38 @@ __device__ int blockReduceSum(int val)
     __syncthreads(); 
 
     return shared[0];
+}
+
+__device__ int warpPrefixSum(int value)
+{
+    int laneId = threadIdx.x & 0x1f;
+    for (int i = 1; i*2 <= warpSize; i *= 2)
+    {
+        int n = __shfl_up_sync(0xffffffff, value, i);
+        if ((laneId & (warpSize - 1)) >= i)
+            value += n;
+    }
+
+    return value;
+}
+
+__device__ int blockPrefixSum(int value)
+{
+    static __shared__ int shared[32];
+    int lane = threadIdx.x & (warpSize - 1);
+    int wid = threadIdx.x / warpSize;
+
+    int tmp = warpPrefixSum(value);
+
+    if (lane == warpSize-1)
+        shared[wid] = tmp;
+    __syncthreads();
+
+    int tmp2 = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
+    if (wid == 0)
+        shared[lane] = warpPrefixSum(tmp2) - shared[lane];
+    __syncthreads();
+    
+    tmp += shared[wid];
+    return tmp;
 }
\ No newline at end of file