Loading src/quicksort/quicksort_1Block.cuh +16 −16 Original line number Diff line number Diff line Loading @@ -31,27 +31,27 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], int stackDepth[], int &stackTop, int begin, int pivotBegin, int pivotEnd, int end, int depth); int iteration); //--------------------------------------------------------------- template <typename Value, typename CMP, int stackSize, bool useShared> __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, ArrayView<Value, TNL::Devices::Cuda> aux, const CMP &Cmp, int _depth, const CMP &Cmp, int _iteration, Value *sharedMem, int memSize, int maxBitonicSize) { if (arr.getSize() <= maxBitonicSize) { auto &src = (_depth & 1) == 0 ? arr : aux; auto &src = (_iteration & 1) == 0 ? arr : aux; if (useShared && arr.getSize() <= memSize) externSort<Value, CMP>(src, arr, Cmp, sharedMem); else { externSort<Value, CMP>(src, Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((_depth & 1) != 0) if ((_iteration & 1) != 0) for (int i = threadIdx.x; i < arr.getSize(); i += blockDim.x) arr[i] = src[i]; } Loading @@ -61,7 +61,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, static __shared__ int stackTop; static __shared__ int stackArrBegin[stackSize], stackArrEnd[stackSize], stackDepth[stackSize]; static __shared__ int begin, end, depth; static __shared__ int begin, end, iteration; static __shared__ int pivotBegin, pivotEnd; Value *piv = sharedMem; sharedMem += 1; Loading @@ -71,7 +71,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, stackTop = 0; stackArrBegin[stackTop] = 0; stackArrEnd[stackTop] = arr.getSize(); stackDepth[stackTop] = _depth; stackDepth[stackTop] = _iteration; stackTop++; } __syncthreads(); Loading @@ -83,13 +83,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, { begin = stackArrBegin[stackTop - 1]; end = stackArrEnd[stackTop - 1]; depth = stackDepth[stackTop - 1]; iteration = stackDepth[stackTop - 1]; stackTop--; } __syncthreads(); int size = end - begin; auto &src = (depth & 1) == 0 ? arr : aux; auto &src = (iteration & 1) == 0 ? arr : aux; //small enough for for bitonic if (size <= maxBitonicSize) Loading @@ -100,7 +100,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, { externSort<Value, CMP>(src.getView(begin, end), Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((depth & 1) != 0) if ((iteration & 1) != 0) for (int i = threadIdx.x; i < src.getSize(); i += blockDim.x) arr[begin + i] = src[i]; } Loading Loading @@ -134,7 +134,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, * move elements, either use shared mem for coalesced access or without shared mem if data is too big * */ auto &dst = (depth & 1) == 0 ? aux : arr; auto &dst = (iteration & 1) == 0 ? aux : arr; if (useShared && size <= memSize) { Loading Loading @@ -172,7 +172,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, stackPush<stackSize>(stackArrBegin, stackArrEnd, stackDepth, stackTop, begin, begin + pivotBegin, begin + pivotEnd, end, depth); iteration); } __syncthreads(); //sync to update stackTop } //ends while loop Loading @@ -185,7 +185,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], int stackDepth[], int &stackTop, int begin, int pivotBegin, int pivotEnd, int end, int depth) int iteration) { int sizeL = pivotBegin - begin, sizeR = end - pivotEnd; Loading @@ -197,7 +197,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], { stackArrBegin[stackTop] = begin; stackArrEnd[stackTop] = pivotBegin; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } Loading @@ -207,7 +207,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], stackArrBegin[stackTop] = pivotEnd; stackArrEnd[stackTop] = end; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } } Loading @@ -217,7 +217,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], { stackArrBegin[stackTop] = pivotEnd; stackArrEnd[stackTop] = end; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } Loading @@ -227,7 +227,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], stackArrBegin[stackTop] = begin; stackArrEnd[stackTop] = pivotBegin; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } } Loading src/quicksort/quicksort_kernel.cuh +15 −15 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ using namespace TNL::Containers; //----------------------------------------------------------- __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPhase, __device__ void writeNewTask(int begin, int end, int iteration, int maxElemFor2ndPhase, ArrayView<TASK, Devices::Cuda> newTasks, int *newTasksCnt, ArrayView<TASK, Devices::Cuda> secondPhaseTasks, int *secondPhaseTasksCnt); Loading Loading @@ -67,8 +67,8 @@ __global__ void cudaQuickSort1stPhase(ArrayView<Value, Devices::Cuda> arr, Array Value *sharedMem = piv + 1; TASK &myTask = tasks[taskMapping[blockIdx.x]]; auto &src = (myTask.depth & 1) == 0 ? arr : aux; auto &dst = (myTask.depth & 1) == 0 ? aux : arr; auto &src = (myTask.iteration & 1) == 0 ? arr : aux; auto &dst = (myTask.iteration & 1) == 0 ? aux : arr; if (threadIdx.x == 0) *piv = src[myTask.pivotIdx]; Loading @@ -95,7 +95,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va TASK &myTask = tasks[blockIdx.x]; if (threadIdx.x == 0) *piv = (myTask.depth & 1) == 0 ? arr[myTask.pivotIdx] : aux[myTask.pivotIdx]; *piv = (myTask.iteration & 1) == 0 ? arr[myTask.pivotIdx] : aux[myTask.pivotIdx]; __syncthreads(); Value &pivot = *piv; Loading @@ -117,7 +117,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va if (leftEnd - leftBegin > 0) { writeNewTask(leftBegin, leftEnd, myTask.depth, writeNewTask(leftBegin, leftEnd, myTask.iteration, maxElemFor2ndPhase, newTasks, newTasksCnt, secondPhaseTasks, secondPhaseTasksCnt); Loading @@ -126,7 +126,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va if (rightEnd - rightBegin > 0) { writeNewTask(rightBegin, rightEnd, myTask.depth, maxElemFor2ndPhase, myTask.iteration, maxElemFor2ndPhase, newTasks, newTasksCnt, secondPhaseTasks, secondPhaseTasksCnt); } Loading @@ -134,7 +134,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va //----------------------------------------------------------- __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPhase, __device__ void writeNewTask(int begin, int end, int iteration, int maxElemFor2ndPhase, ArrayView<TASK, Devices::Cuda> newTasks, int *newTasksCnt, ArrayView<TASK, Devices::Cuda> secondPhaseTasks, int *secondPhaseTasksCnt) { Loading @@ -152,13 +152,13 @@ __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPha { int idx = atomicAdd(secondPhaseTasksCnt, 1); if (idx < secondPhaseTasks.getSize()) secondPhaseTasks[idx] = TASK(begin, end, depth + 1); secondPhaseTasks[idx] = TASK(begin, end, iteration + 1); else { //printf("ran out of memory, trying backup\n"); int idx = atomicAdd(newTasksCnt, 1); if (idx < newTasks.getSize()) newTasks[idx] = TASK(begin, end, depth + 1); newTasks[idx] = TASK(begin, end, iteration + 1); else printf("ran out of memory for second phase task, there isnt even space in newTask list\nPart of array may stay unsorted!!!\n"); } Loading @@ -167,13 +167,13 @@ __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPha { int idx = atomicAdd(newTasksCnt, 1); if (idx < newTasks.getSize()) newTasks[idx] = TASK(begin, end, depth + 1); newTasks[idx] = TASK(begin, end, iteration + 1); else { //printf("ran out of memory, trying backup\n"); int idx = atomicAdd(secondPhaseTasksCnt, 1); if (idx < secondPhaseTasks.getSize()) secondPhaseTasks[idx] = TASK(begin, end, depth + 1); secondPhaseTasks[idx] = TASK(begin, end, iteration + 1); else printf("ran out of memory for newtask, there isnt even space in second phase task list\nPart of array may stay unsorted!!!\n"); } Loading Loading @@ -203,11 +203,11 @@ __global__ void cudaQuickSort2ndPhase(ArrayView<Value, Devices::Cuda> arr, Array if (elemInShared == 0) { singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.depth, sharedMem, 0, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.iteration, sharedMem, 0, maxBitonicSize); } else { singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.depth, sharedMem, elemInShared, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.iteration, sharedMem, elemInShared, maxBitonicSize); } } Loading Loading @@ -238,11 +238,11 @@ __global__ void cudaQuickSort2ndPhase(ArrayView<Value, Devices::Cuda> arr, Array if (elemInShared <= 0) { singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.depth, sharedMem, 0, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.iteration, sharedMem, 0, maxBitonicSize); } else { singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.depth, sharedMem, elemInShared, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.iteration, sharedMem, elemInShared, maxBitonicSize); } } Loading src/quicksort/task.h +4 −4 Original line number Diff line number Diff line Loading @@ -7,15 +7,15 @@ struct TASK //----------------------------------------------- //helper variables for blocks working on this task int depth; int iteration; int pivotIdx; int dstBegin, dstEnd; int firstBlock, blockCount;//for workers read only values __cuda_callable__ TASK(int begin, int end, int depth) TASK(int begin, int end, int iteration) : partitionBegin(begin), partitionEnd(end), depth(depth), pivotIdx(-1), iteration(iteration), pivotIdx(-1), dstBegin(-151561), dstEnd(-151561), firstBlock(-100), blockCount(-100) {} Loading @@ -42,7 +42,7 @@ std::ostream& operator<<(std::ostream & out, const TASK & task) { out << "[ "; out << task.partitionBegin << " - " << task.partitionEnd; out << " | " << "depth: " << task.depth; out << " | " << "iteration: " << task.iteration; out << " | " << "pivotIdx: " << task.pivotIdx; return out << " ] "; } No newline at end of file Loading
src/quicksort/quicksort_1Block.cuh +16 −16 Original line number Diff line number Diff line Loading @@ -31,27 +31,27 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], int stackDepth[], int &stackTop, int begin, int pivotBegin, int pivotEnd, int end, int depth); int iteration); //--------------------------------------------------------------- template <typename Value, typename CMP, int stackSize, bool useShared> __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, ArrayView<Value, TNL::Devices::Cuda> aux, const CMP &Cmp, int _depth, const CMP &Cmp, int _iteration, Value *sharedMem, int memSize, int maxBitonicSize) { if (arr.getSize() <= maxBitonicSize) { auto &src = (_depth & 1) == 0 ? arr : aux; auto &src = (_iteration & 1) == 0 ? arr : aux; if (useShared && arr.getSize() <= memSize) externSort<Value, CMP>(src, arr, Cmp, sharedMem); else { externSort<Value, CMP>(src, Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((_depth & 1) != 0) if ((_iteration & 1) != 0) for (int i = threadIdx.x; i < arr.getSize(); i += blockDim.x) arr[i] = src[i]; } Loading @@ -61,7 +61,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, static __shared__ int stackTop; static __shared__ int stackArrBegin[stackSize], stackArrEnd[stackSize], stackDepth[stackSize]; static __shared__ int begin, end, depth; static __shared__ int begin, end, iteration; static __shared__ int pivotBegin, pivotEnd; Value *piv = sharedMem; sharedMem += 1; Loading @@ -71,7 +71,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, stackTop = 0; stackArrBegin[stackTop] = 0; stackArrEnd[stackTop] = arr.getSize(); stackDepth[stackTop] = _depth; stackDepth[stackTop] = _iteration; stackTop++; } __syncthreads(); Loading @@ -83,13 +83,13 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, { begin = stackArrBegin[stackTop - 1]; end = stackArrEnd[stackTop - 1]; depth = stackDepth[stackTop - 1]; iteration = stackDepth[stackTop - 1]; stackTop--; } __syncthreads(); int size = end - begin; auto &src = (depth & 1) == 0 ? arr : aux; auto &src = (iteration & 1) == 0 ? arr : aux; //small enough for for bitonic if (size <= maxBitonicSize) Loading @@ -100,7 +100,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, { externSort<Value, CMP>(src.getView(begin, end), Cmp); //extern sort without shared memory only works in-place, need to copy into from aux if ((depth & 1) != 0) if ((iteration & 1) != 0) for (int i = threadIdx.x; i < src.getSize(); i += blockDim.x) arr[begin + i] = src[i]; } Loading Loading @@ -134,7 +134,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, * move elements, either use shared mem for coalesced access or without shared mem if data is too big * */ auto &dst = (depth & 1) == 0 ? aux : arr; auto &dst = (iteration & 1) == 0 ? aux : arr; if (useShared && size <= memSize) { Loading Loading @@ -172,7 +172,7 @@ __device__ void singleBlockQuickSort(ArrayView<Value, TNL::Devices::Cuda> arr, stackPush<stackSize>(stackArrBegin, stackArrEnd, stackDepth, stackTop, begin, begin + pivotBegin, begin + pivotEnd, end, depth); iteration); } __syncthreads(); //sync to update stackTop } //ends while loop Loading @@ -185,7 +185,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], int stackDepth[], int &stackTop, int begin, int pivotBegin, int pivotEnd, int end, int depth) int iteration) { int sizeL = pivotBegin - begin, sizeR = end - pivotEnd; Loading @@ -197,7 +197,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], { stackArrBegin[stackTop] = begin; stackArrEnd[stackTop] = pivotBegin; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } Loading @@ -207,7 +207,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], stackArrBegin[stackTop] = pivotEnd; stackArrEnd[stackTop] = end; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } } Loading @@ -217,7 +217,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], { stackArrBegin[stackTop] = pivotEnd; stackArrEnd[stackTop] = end; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } Loading @@ -227,7 +227,7 @@ __device__ void stackPush(int stackArrBegin[], int stackArrEnd[], stackArrBegin[stackTop] = begin; stackArrEnd[stackTop] = pivotBegin; stackDepth[stackTop] = depth + 1; stackDepth[stackTop] = iteration + 1; stackTop++; } } Loading
src/quicksort/quicksort_kernel.cuh +15 −15 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ using namespace TNL::Containers; //----------------------------------------------------------- __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPhase, __device__ void writeNewTask(int begin, int end, int iteration, int maxElemFor2ndPhase, ArrayView<TASK, Devices::Cuda> newTasks, int *newTasksCnt, ArrayView<TASK, Devices::Cuda> secondPhaseTasks, int *secondPhaseTasksCnt); Loading Loading @@ -67,8 +67,8 @@ __global__ void cudaQuickSort1stPhase(ArrayView<Value, Devices::Cuda> arr, Array Value *sharedMem = piv + 1; TASK &myTask = tasks[taskMapping[blockIdx.x]]; auto &src = (myTask.depth & 1) == 0 ? arr : aux; auto &dst = (myTask.depth & 1) == 0 ? aux : arr; auto &src = (myTask.iteration & 1) == 0 ? arr : aux; auto &dst = (myTask.iteration & 1) == 0 ? aux : arr; if (threadIdx.x == 0) *piv = src[myTask.pivotIdx]; Loading @@ -95,7 +95,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va TASK &myTask = tasks[blockIdx.x]; if (threadIdx.x == 0) *piv = (myTask.depth & 1) == 0 ? arr[myTask.pivotIdx] : aux[myTask.pivotIdx]; *piv = (myTask.iteration & 1) == 0 ? arr[myTask.pivotIdx] : aux[myTask.pivotIdx]; __syncthreads(); Value &pivot = *piv; Loading @@ -117,7 +117,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va if (leftEnd - leftBegin > 0) { writeNewTask(leftBegin, leftEnd, myTask.depth, writeNewTask(leftBegin, leftEnd, myTask.iteration, maxElemFor2ndPhase, newTasks, newTasksCnt, secondPhaseTasks, secondPhaseTasksCnt); Loading @@ -126,7 +126,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va if (rightEnd - rightBegin > 0) { writeNewTask(rightBegin, rightEnd, myTask.depth, maxElemFor2ndPhase, myTask.iteration, maxElemFor2ndPhase, newTasks, newTasksCnt, secondPhaseTasks, secondPhaseTasksCnt); } Loading @@ -134,7 +134,7 @@ __global__ void cudaWritePivot(ArrayView<Value, Devices::Cuda> arr, ArrayView<Va //----------------------------------------------------------- __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPhase, __device__ void writeNewTask(int begin, int end, int iteration, int maxElemFor2ndPhase, ArrayView<TASK, Devices::Cuda> newTasks, int *newTasksCnt, ArrayView<TASK, Devices::Cuda> secondPhaseTasks, int *secondPhaseTasksCnt) { Loading @@ -152,13 +152,13 @@ __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPha { int idx = atomicAdd(secondPhaseTasksCnt, 1); if (idx < secondPhaseTasks.getSize()) secondPhaseTasks[idx] = TASK(begin, end, depth + 1); secondPhaseTasks[idx] = TASK(begin, end, iteration + 1); else { //printf("ran out of memory, trying backup\n"); int idx = atomicAdd(newTasksCnt, 1); if (idx < newTasks.getSize()) newTasks[idx] = TASK(begin, end, depth + 1); newTasks[idx] = TASK(begin, end, iteration + 1); else printf("ran out of memory for second phase task, there isnt even space in newTask list\nPart of array may stay unsorted!!!\n"); } Loading @@ -167,13 +167,13 @@ __device__ void writeNewTask(int begin, int end, int depth, int maxElemFor2ndPha { int idx = atomicAdd(newTasksCnt, 1); if (idx < newTasks.getSize()) newTasks[idx] = TASK(begin, end, depth + 1); newTasks[idx] = TASK(begin, end, iteration + 1); else { //printf("ran out of memory, trying backup\n"); int idx = atomicAdd(secondPhaseTasksCnt, 1); if (idx < secondPhaseTasks.getSize()) secondPhaseTasks[idx] = TASK(begin, end, depth + 1); secondPhaseTasks[idx] = TASK(begin, end, iteration + 1); else printf("ran out of memory for newtask, there isnt even space in second phase task list\nPart of array may stay unsorted!!!\n"); } Loading Loading @@ -203,11 +203,11 @@ __global__ void cudaQuickSort2ndPhase(ArrayView<Value, Devices::Cuda> arr, Array if (elemInShared == 0) { singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.depth, sharedMem, 0, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.iteration, sharedMem, 0, maxBitonicSize); } else { singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.depth, sharedMem, elemInShared, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.iteration, sharedMem, elemInShared, maxBitonicSize); } } Loading Loading @@ -238,11 +238,11 @@ __global__ void cudaQuickSort2ndPhase(ArrayView<Value, Devices::Cuda> arr, Array if (elemInShared <= 0) { singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.depth, sharedMem, 0, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, false>(arrView, auxView, Cmp, myTask.iteration, sharedMem, 0, maxBitonicSize); } else { singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.depth, sharedMem, elemInShared, maxBitonicSize); singleBlockQuickSort<Value, CMP, stackSize, true>(arrView, auxView, Cmp, myTask.iteration, sharedMem, elemInShared, maxBitonicSize); } } Loading
src/quicksort/task.h +4 −4 Original line number Diff line number Diff line Loading @@ -7,15 +7,15 @@ struct TASK //----------------------------------------------- //helper variables for blocks working on this task int depth; int iteration; int pivotIdx; int dstBegin, dstEnd; int firstBlock, blockCount;//for workers read only values __cuda_callable__ TASK(int begin, int end, int depth) TASK(int begin, int end, int iteration) : partitionBegin(begin), partitionEnd(end), depth(depth), pivotIdx(-1), iteration(iteration), pivotIdx(-1), dstBegin(-151561), dstEnd(-151561), firstBlock(-100), blockCount(-100) {} Loading @@ -42,7 +42,7 @@ std::ostream& operator<<(std::ostream & out, const TASK & task) { out << "[ "; out << task.partitionBegin << " - " << task.partitionEnd; out << " | " << "depth: " << task.depth; out << " | " << "iteration: " << task.iteration; out << " | " << "pivotIdx: " << task.pivotIdx; return out << " ] "; } No newline at end of file