From 6b923f49cd8ae9059d12cd8c6ef0bfe84152b0e7 Mon Sep 17 00:00:00 2001 From: Xuan Thang Nguyen <nguyexu2@fit.cvut.cz> Date: Sat, 6 Mar 2021 02:40:50 +0100 Subject: [PATCH] template unrolling --- quicksort/reduction.cuh | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/quicksort/reduction.cuh b/quicksort/reduction.cuh index 9c528c1..234871c 100644 --- a/quicksort/reduction.cuh +++ b/quicksort/reduction.cuh @@ -36,37 +36,28 @@ __device__ int blockReduceSum(int val) return shared[0]; } + +template<int it> __device__ int warpInclusivePrefixSum(int value) { - int laneId = threadIdx.x & 0x1f; - for (int i = 1; i*2 <= warpSize; i *= 2) + if(it*2 <= 32) { + int i = it; int n = __shfl_up_sync(0xffffffff, value, i); + int laneId = threadIdx.x & 0x1f; if ((laneId & (warpSize - 1)) >= i) value += n; + return warpInclusivePrefixSum<it*2 >= 32? 32 : it*2>(value); + } return value; } -/* -template<int it = 32> __device__ int warpInclusivePrefixSum(int value) { - if(it >= 2) - { - int i = it == 0? 32 : 32/it; - int n = __shfl_up_sync(0xffffffff, value, i); - int laneId = threadIdx.x & 0x1f; - if ((laneId & (warpSize - 1)) >= i) - value += n; - return warpInclusivePrefixSum<it/2>(value); - - } - - return value; + return warpInclusivePrefixSum<1>(value); } -*/ __device__ int blockInclusivePrefixSum(int value) { -- GitLab