Commit 3a5314cc authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

fix copy after no shared bitonic sort

parent ceab8c03
Loading
Loading
Loading
Loading
+15 −4
Original line number Diff line number Diff line
@@ -19,10 +19,9 @@ __device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src,

template <typename Value, typename Function>
__device__ void externSort(ArrayView<Value, TNL::Devices::Cuda> src,
                           ArrayView<Value, TNL::Devices::Cuda> dst,
                           const Function &Cmp)
{
    bitonicSort_Block(src, dst, Cmp);
    bitonicSort_Block(src, Cmp);
}

//---------------------------------------------------------------
@@ -48,7 +47,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr,
        if (useShared && arr.getSize() <= memSize)
            externSort<Value, Function>(src, arr, Cmp, sharedMem);
        else
            externSort<Value, Function>(src, arr, Cmp);
        {
            externSort<Value, Function>(src, Cmp);
            //extern sort without shared memory only works in-place, need to copy into from aux
            if ((_depth & 1) != 0)
                for (int i = threadIdx.x; i < arr.getSize(); i += blockDim.x)
                    arr[i] = src[i];
        }

        return;
    }
@@ -91,7 +96,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr,
            if (useShared && size <= memSize)
                externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp, sharedMem);
            else
                externSort<Value, Function>(src.getView(begin, end), arr.getView(begin, end), Cmp);
            {
                externSort<Value, Function>(src.getView(begin, end), Cmp);
                //extern sort without shared memory only works in-place, need to copy into from aux
                if ((depth & 1) != 0)
                    for (int i = threadIdx.x; i < src.getSize(); i += blockDim.x)
                        arr[begin + i] = src[i];
            }
            __syncthreads();
            continue;
        }