diff --git a/src/bitonicSort/bitonicSort.h b/src/bitonicSort/bitonicSort.h index a066ef5d8912a366a033ea165bce7878d7c09946..9cff095db38f4b1809776fb674c7ab53463322c4 100644 --- a/src/bitonicSort/bitonicSort.h +++ b/src/bitonicSort/bitonicSort.h @@ -178,6 +178,38 @@ void bitonicSort_Block(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> src dst[copy2] = sharedMem[copy2]; } } + +template <typename Value, typename Function> +__device__ +void bitonicSort_Block(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> src, + TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> dst, + const Function & Cmp) +{ + int i = threadIdx.x; + int paddedSize = closestPow2(src.getSize()); + + for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2) + { + //calculate the direction of swapping + int monotonicSeqIdx = i / (monotonicSeqLen/2); + bool ascending = (monotonicSeqIdx & 1) != 0; + if ((monotonicSeqIdx + 1) * monotonicSeqLen >= src.getSize()) //special case for parts with no "partner" + ascending = true; + + for (int len = monotonicSeqLen; len > 1; len /= 2) + { + //calculates which 2 indexes will be compared and swap + int part = i / (len / 2); + int s = part * len + (i & ((len / 2) - 1)); + int e = s + len / 2; + + if(e < src.getSize()) //not touching virtual padding + cmpSwap(src[s], src[e], ascending, Cmp); + __syncthreads(); + } + } +} + /** * very similar to bitonicMergeSharedMemory * does bitonicMergeSharedMemory but afterwards increases monotoncSeqLen