Skip to content
Snippets Groups Projects
Commit afc65716 authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

copy in aux and write pivot

parent dc97c6b5
No related branches found
No related tags found
No related merge requests found
......@@ -3,11 +3,13 @@
#include <TNL/Containers/Array.h>
#include "reduction.cuh"
#define deb(x) std::cout << #x << " = " << x << std::endl;
using CudaArrayView = TNL::Containers::ArrayView<int, TNL::Devices::Cuda>;
__device__ void cmpElem(CudaArrayView arr, int myBegin, int myEnd, int pivot, int &smaller, int &bigger)
{
for (int i = myBegin + threadIdx.x; i < myEnd; i += threadIdx.x)
for (int i = myBegin + threadIdx.x; i < myEnd; i += blockDim.x)
{
int data = arr[i];
if (data < pivot)
......@@ -58,6 +60,14 @@ __global__ void cudaPartition(CudaArrayView arr, int begin, int end,
int auxThreadSmallerBegin = atomicAdd(smallerStart, smaller);
int auxThreadBiggerBegin = atomicAdd(biggerStart, bigger);
copyData(arr, myBegin, myEnd, pivot, aux, auxThreadSmallerBegin, auxThreadBiggerBegin);
__syncthreads();
//inserts pivot
if (threadIdx.x * blockIdx.x == 0)
{
aux[*auxEndIdx - 1] = pivot;
*newPivotPos = *auxEndIdx - 1;
}
}
int partition(CudaArrayView arr, int begin, int end, int pivotIdx)
......@@ -80,13 +90,21 @@ int partition(CudaArrayView arr, int begin, int end, int pivotIdx)
}
//------------------------------------
TNL::Containers::Array<int, TNL::Devices::Cuda> aux(end - begin), cudaAuxBegin({0}), cudaAuxEnd({end});
TNL::Containers::Array<int, TNL::Devices::Cuda> newPivotPos;
cudaPartition<<<blocks, maxBlocks>>>(arr, begin, end,
aux, cudaAuxBegin.getData(), cudaAuxEnd.getData(),
pivotIdx, newPivotPos.getData(),
elemPerBlock);
TNL::Containers::Array<int, TNL::Devices::Cuda> aux(arr.getSize());
TNL::Algorithms::MultiDeviceMemoryOperations<TNL::Devices::Cuda, TNL::Devices::Cuda >::
copy(aux.getData(), arr.getData(), arr.getSize());
TNL::Containers::Array<int, TNL::Devices::Cuda> cudaAuxBegin({begin}), cudaAuxEnd({end}), newPivotPos(1);
//------------------------------------
cudaPartition<<<blocks, threadsPerBlock>>>(arr, begin, end,
aux, cudaAuxBegin.getData(), cudaAuxEnd.getData(),
pivotIdx, newPivotPos.getData(),
elemPerBlock);
//------------------------------------
TNL::Algorithms::MultiDeviceMemoryOperations<TNL::Devices::Cuda, TNL::Devices::Cuda >::
copy(arr.getData(), aux.getData(), aux.getSize());
return newPivotPos.getElement(0);
}
......@@ -97,7 +115,6 @@ void quicksort(CudaArrayView arr, int begin, int end)
{
if (begin >= end)
return;
int newPivotPos = partition(arr, begin, end, end - 1);
quicksort(arr, begin, newPivotPos);
quicksort(arr, newPivotPos + 1, end);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment