Loading src/quicksort/quicksort_1Block.cuh +15 −4 Original line number Original line Diff line number Diff line Loading @@ -19,10 +19,9 @@ __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, template <typename Value, typename Function> template <typename Value, typename Function> __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, ArrayView<Value, TNL::Devices::Cuda> dst, const Function &Cmp) const Function &Cmp) { { bitonicSort_Block(src, dst, Cmp); bitonicSort_Block(src, Cmp); } } //--------------------------------------------------------------- //--------------------------------------------------------------- Loading @@ -48,7 +47,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, if (useShared && arr.getSize() <= memSize) if (useShared && arr.getSize() <= memSize) externSort<Value, Function>(src, arr, Cmp, sharedMem); externSort<Value, Function>(src, arr, Cmp, sharedMem); else else externSort<Value, Function>(src, arr, Cmp); { externSort<Value, Function>(src, Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((_depth & 1) != 0) for (int i = threadIdx.x; i < arr.getSize(); i += blockDim.x) arr[i] = src[i]; } return; return; } } Loading Loading @@ -91,7 +96,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, if (useShared && size <= memSize) if (useShared && size <= memSize) externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem); externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem); else else externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp); { externSort<Value, Function>(src.getView(begin, end), Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((depth & 1) != 0) for (int i = threadIdx.x; i < src.getSize(); i += blockDim.x) arr[begin + i] = src[i]; } __syncthreads(); __syncthreads(); continue; continue; } } Loading Loading
src/quicksort/quicksort_1Block.cuh +15 −4 Original line number Original line Diff line number Diff line Loading @@ -19,10 +19,9 @@ __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, template <typename Value, typename Function> template <typename Value, typename Function> __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src, ArrayView<Value, TNL::Devices::Cuda> dst, const Function &Cmp) const Function &Cmp) { { bitonicSort_Block(src, dst, Cmp); bitonicSort_Block(src, Cmp); } } //--------------------------------------------------------------- //--------------------------------------------------------------- Loading @@ -48,7 +47,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, if (useShared && arr.getSize() <= memSize) if (useShared && arr.getSize() <= memSize) externSort<Value, Function>(src, arr, Cmp, sharedMem); externSort<Value, Function>(src, arr, Cmp, sharedMem); else else externSort<Value, Function>(src, arr, Cmp); { externSort<Value, Function>(src, Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((_depth & 1) != 0) for (int i = threadIdx.x; i < arr.getSize(); i += blockDim.x) arr[i] = src[i]; } return; return; } } Loading Loading @@ -91,7 +96,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, if (useShared && size <= memSize) if (useShared && size <= memSize) externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem); externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem); else else externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp); { externSort<Value, Function>(src.getView(begin, end), Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((depth & 1) != 0) for (int i = threadIdx.x; i < src.getSize(); i += blockDim.x) arr[begin + i] = src[i]; } __syncthreads(); __syncthreads(); continue; continue; } } Loading