Loading src/bitonicSort/bitonicSort.h +6 −6 Original line number Original line Diff line number Diff line Loading @@ -296,8 +296,8 @@ void bitonicSort(std::vector<Value> & vec) //--------------------------------------------- //--------------------------------------------- template <typename FETCH, typename CMP, typename SWAP> template <typename FETCH, typename CMP, typename SWAP> __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch, __global__ void bitonicMergeGlobal(int size, FETCH Fetch, const CMP & Cmp, const SWAP & Swap, const CMP & Cmp, SWAP Swap, int monotonicSeqLen, int len, int partsInSeq) int monotonicSeqLen, int len, int partsInSeq) { { int i = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.x * blockDim.x + threadIdx.x; Loading @@ -316,14 +316,14 @@ __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch, if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase ascending = true; ascending = true; if( (ascending == Cmp(Fetch(e), Fetch(s)))) if( ascending == Cmp(Fetch(e), Fetch(s))) Swap(s, e); Swap(s, e); } } template <typename FETCH, typename CMP, typename SWAP> template <typename FETCH, typename CMP, typename SWAP> void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const SWAP & Swap) void bitonicSort(int begin, int end, FETCH Fetch, const CMP& Cmp, SWAP Swap) { { int size = end - begin; int size = end - begin; int paddedSize = closestPow2(size); int paddedSize = closestPow2(size); Loading @@ -341,9 +341,9 @@ void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const }; }; auto swapWithOffset = auto swapWithOffset = [=] __cuda_callable__(int i, int j) [=] __cuda_callable__(int i, int j) mutable { { return Swap(i+begin, i+begin); Swap(i+begin, j+begin); }; }; for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2) for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2) Loading tests/bitonic_tests/unitTests.cu +49 −9 Original line number Original line Diff line number Diff line Loading @@ -239,15 +239,14 @@ TEST(sortRange, middleMultiBlock) ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1); ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1); ASSERT_TRUE(arr.back() == -1); ASSERT_TRUE(arr.back() == -1); } } /* void fetchAndSwapSorter(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view) { //auto Fetch = [=]__cuda_callable__(int i){return view[i];}; //auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;}; //auto Swap = [=] __device__ (int i, int j){TNL::swap(view[i], view[j]);}; //bitonicSort(0, view.getSize(), Fetch, Cmp, Swap); template<typename TYPE> void fetchAndSwapSorter(TNL::Containers::ArrayView<TYPE, TNL::Devices::Cuda> view) { auto Fetch = [=]__cuda_callable__(int i){return view[i];}; auto Cmp = [=]__cuda_callable__(const TYPE & a, const TYPE & b){return a < b;}; auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);}; bitonicSort(0, view.getSize(), Fetch, Cmp, Swap); } } TEST(fetchAndSwap, oneBlockSort) TEST(fetchAndSwap, oneBlockSort) Loading @@ -271,7 +270,48 @@ TEST(fetchAndSwap, oneBlockSort) } } while (std::next_permutation(orig.begin(), orig.end())); while (std::next_permutation(orig.begin(), orig.end())); } } */ TEST(fetchAndSwap, typeDouble) { int size = 5; std::vector<double> orig(size); std::iota(orig.begin(), orig.end(), 0); do { TNL::Containers::Array<double, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); fetchAndSwapSorter(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } while (std::next_permutation(orig.begin(), orig.end())); } void fetchAndSwap_sortMiddle(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view, int from, int to) { auto Fetch = [=]__cuda_callable__(int i){return view[i];}; auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;}; auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);}; bitonicSort(from, to, Fetch, Cmp, Swap); } TEST(fetchAndSwap, sortMiddle) { std::vector<int> orig{5, 9, 4, 54, 21, 6, 7, 9, 0, 9, 42, 4}; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); int from = 3, to = 8; fetchAndSwap_sortMiddle(view, from, to); ASSERT_TRUE(is_sorted(view.getView(3, 8))) << "result " << view << std::endl; for(size_t i = 0; i < orig.size(); i++) { if(i < from || i >= to) ASSERT_TRUE(view.getElement(i) == orig[i]); } } //---------------------------------------------------------------------------------- //---------------------------------------------------------------------------------- Loading Loading
src/bitonicSort/bitonicSort.h +6 −6 Original line number Original line Diff line number Diff line Loading @@ -296,8 +296,8 @@ void bitonicSort(std::vector<Value> & vec) //--------------------------------------------- //--------------------------------------------- template <typename FETCH, typename CMP, typename SWAP> template <typename FETCH, typename CMP, typename SWAP> __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch, __global__ void bitonicMergeGlobal(int size, FETCH Fetch, const CMP & Cmp, const SWAP & Swap, const CMP & Cmp, SWAP Swap, int monotonicSeqLen, int len, int partsInSeq) int monotonicSeqLen, int len, int partsInSeq) { { int i = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.x * blockDim.x + threadIdx.x; Loading @@ -316,14 +316,14 @@ __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch, if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase ascending = true; ascending = true; if( (ascending == Cmp(Fetch(e), Fetch(s)))) if( ascending == Cmp(Fetch(e), Fetch(s))) Swap(s, e); Swap(s, e); } } template <typename FETCH, typename CMP, typename SWAP> template <typename FETCH, typename CMP, typename SWAP> void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const SWAP & Swap) void bitonicSort(int begin, int end, FETCH Fetch, const CMP& Cmp, SWAP Swap) { { int size = end - begin; int size = end - begin; int paddedSize = closestPow2(size); int paddedSize = closestPow2(size); Loading @@ -341,9 +341,9 @@ void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const }; }; auto swapWithOffset = auto swapWithOffset = [=] __cuda_callable__(int i, int j) [=] __cuda_callable__(int i, int j) mutable { { return Swap(i+begin, i+begin); Swap(i+begin, j+begin); }; }; for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2) for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2) Loading
tests/bitonic_tests/unitTests.cu +49 −9 Original line number Original line Diff line number Diff line Loading @@ -239,15 +239,14 @@ TEST(sortRange, middleMultiBlock) ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1); ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1); ASSERT_TRUE(arr.back() == -1); ASSERT_TRUE(arr.back() == -1); } } /* void fetchAndSwapSorter(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view) { //auto Fetch = [=]__cuda_callable__(int i){return view[i];}; //auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;}; //auto Swap = [=] __device__ (int i, int j){TNL::swap(view[i], view[j]);}; //bitonicSort(0, view.getSize(), Fetch, Cmp, Swap); template<typename TYPE> void fetchAndSwapSorter(TNL::Containers::ArrayView<TYPE, TNL::Devices::Cuda> view) { auto Fetch = [=]__cuda_callable__(int i){return view[i];}; auto Cmp = [=]__cuda_callable__(const TYPE & a, const TYPE & b){return a < b;}; auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);}; bitonicSort(0, view.getSize(), Fetch, Cmp, Swap); } } TEST(fetchAndSwap, oneBlockSort) TEST(fetchAndSwap, oneBlockSort) Loading @@ -271,7 +270,48 @@ TEST(fetchAndSwap, oneBlockSort) } } while (std::next_permutation(orig.begin(), orig.end())); while (std::next_permutation(orig.begin(), orig.end())); } } */ TEST(fetchAndSwap, typeDouble) { int size = 5; std::vector<double> orig(size); std::iota(orig.begin(), orig.end(), 0); do { TNL::Containers::Array<double, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); fetchAndSwapSorter(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } while (std::next_permutation(orig.begin(), orig.end())); } void fetchAndSwap_sortMiddle(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view, int from, int to) { auto Fetch = [=]__cuda_callable__(int i){return view[i];}; auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;}; auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);}; bitonicSort(from, to, Fetch, Cmp, Swap); } TEST(fetchAndSwap, sortMiddle) { std::vector<int> orig{5, 9, 4, 54, 21, 6, 7, 9, 0, 9, 42, 4}; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); int from = 3, to = 8; fetchAndSwap_sortMiddle(view, from, to); ASSERT_TRUE(is_sorted(view.getView(3, 8))) << "result " << view << std::endl; for(size_t i = 0; i < orig.size(); i++) { if(i < from || i >= to) ASSERT_TRUE(view.getElement(i) == orig[i]); } } //---------------------------------------------------------------------------------- //---------------------------------------------------------------------------------- Loading