From 6b923f49cd8ae9059d12cd8c6ef0bfe84152b0e7 Mon Sep 17 00:00:00 2001
From: Xuan Thang Nguyen <nguyexu2@fit.cvut.cz>
Date: Sat, 6 Mar 2021 02:40:50 +0100
Subject: [PATCH] template unrolling

---
 quicksort/reduction.cuh | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/quicksort/reduction.cuh b/quicksort/reduction.cuh
index 9c528c1..234871c 100644
--- a/quicksort/reduction.cuh
+++ b/quicksort/reduction.cuh
@@ -36,37 +36,28 @@ __device__ int blockReduceSum(int val)
     return shared[0];
 }
 
+
+template<int it>
 __device__ int warpInclusivePrefixSum(int value)
 {
-    int laneId = threadIdx.x & 0x1f;
-    for (int i = 1; i*2 <= warpSize; i *= 2)
+    if(it*2 <= 32)
     {
+        int i = it;
         int n = __shfl_up_sync(0xffffffff, value, i);
+        int laneId = threadIdx.x & 0x1f;
         if ((laneId & (warpSize - 1)) >= i)
             value += n;
+        return warpInclusivePrefixSum<it*2 >= 32? 32 : it*2>(value);
+        
     }
 
     return value;
 }
 
-/*
-template<int it = 32>
 __device__ int warpInclusivePrefixSum(int value)
 {
-    if(it >= 2)
-    {
-        int i = it == 0? 32 : 32/it;
-        int n = __shfl_up_sync(0xffffffff, value, i);
-        int laneId = threadIdx.x & 0x1f;
-        if ((laneId & (warpSize - 1)) >= i)
-            value += n;
-        return warpInclusivePrefixSum<it/2>(value);
-        
-    }
-
-    return value;
+    return warpInclusivePrefixSum<1>(value);
 }
-*/
 
 __device__ int blockInclusivePrefixSum(int value)
 {
-- 
GitLab