fix fetch and add tests for bitonic (d3c2dc6c) · Commits · TNL / GPUSort

src/bitonicSort/bitonicSort.h

+6 −6

Original line number	Original line	Diff line number	Diff line
	@@ -296,8 +296,8 @@ void bitonicSort(std::vector<Value> & vec)
	//---------------------------------------------		//---------------------------------------------

	template <typename FETCH, typename CMP, typename SWAP>		template <typename FETCH, typename CMP, typename SWAP>
	__global__ void bitonicMergeGlobal(int size, const FETCH & Fetch,		__global__ void bitonicMergeGlobal(int size, FETCH Fetch,
	const CMP & Cmp, const SWAP & Swap,		const CMP & Cmp, SWAP Swap,
	int monotonicSeqLen, int len, int partsInSeq)		int monotonicSeqLen, int len, int partsInSeq)
	{		{
	int i = blockIdx.x * blockDim.x + threadIdx.x;		int i = blockIdx.x * blockDim.x + threadIdx.x;
	@@ -316,14 +316,14 @@ __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch,
	if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase		if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase
	ascending = true;		ascending = true;

	if( (ascending == Cmp(Fetch(e), Fetch(s))))		if( ascending == Cmp(Fetch(e), Fetch(s)))
	Swap(s, e);		Swap(s, e);
	}		}



	template <typename FETCH, typename CMP, typename SWAP>		template <typename FETCH, typename CMP, typename SWAP>
	void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const SWAP & Swap)		void bitonicSort(int begin, int end, FETCH Fetch, const CMP& Cmp, SWAP Swap)
	{		{
	int size = end - begin;		int size = end - begin;
	int paddedSize = closestPow2(size);		int paddedSize = closestPow2(size);
	@@ -341,9 +341,9 @@ void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const
	};		};

	auto swapWithOffset =		auto swapWithOffset =
	[=] __cuda_callable__(int i, int j)		[=] __cuda_callable__(int i, int j) mutable
	{		{
	return Swap(i+begin, i+begin);		Swap(i+begin, j+begin);
	};		};

	for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2)		for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2)

tests/bitonic_tests/unitTests.cu

+49 −9

Original line number	Original line	Diff line number	Diff line
	@@ -239,15 +239,14 @@ TEST(sortRange, middleMultiBlock)
	ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1);		ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1);
	ASSERT_TRUE(arr.back() == -1);		ASSERT_TRUE(arr.back() == -1);
	}		}
	/*
	void fetchAndSwapSorter(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view)
	{

	//auto Fetch = [=]__cuda_callable__(int i){return view[i];};
	//auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;};
	//auto Swap = [=] __device__ (int i, int j){TNL::swap(view[i], view[j]);};
	//bitonicSort(0, view.getSize(), Fetch, Cmp, Swap);

			template<typename TYPE>
			void fetchAndSwapSorter(TNL::Containers::ArrayView<TYPE, TNL::Devices::Cuda> view)
			{
			auto Fetch = [=]__cuda_callable__(int i){return view[i];};
			auto Cmp = [=]__cuda_callable__(const TYPE & a, const TYPE & b){return a < b;};
			auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);};
			bitonicSort(0, view.getSize(), Fetch, Cmp, Swap);
	}		}

	TEST(fetchAndSwap, oneBlockSort)		TEST(fetchAndSwap, oneBlockSort)
	@@ -271,7 +270,48 @@ TEST(fetchAndSwap, oneBlockSort)
	}		}
	while (std::next_permutation(orig.begin(), orig.end()));		while (std::next_permutation(orig.begin(), orig.end()));
	}		}
	*/
			TEST(fetchAndSwap, typeDouble)
			{
			int size = 5;
			std::vector<double> orig(size);
			std::iota(orig.begin(), orig.end(), 0);

			do
			{
			TNL::Containers::Array<double, TNL::Devices::Cuda> cudaArr(orig);
			auto view = cudaArr.getView();
			fetchAndSwapSorter(view);
			ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
			}
			while (std::next_permutation(orig.begin(), orig.end()));
			}

			void fetchAndSwap_sortMiddle(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view, int from, int to)
			{
			auto Fetch = [=]__cuda_callable__(int i){return view[i];};
			auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;};
			auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);};
			bitonicSort(from, to, Fetch, Cmp, Swap);
			}

			TEST(fetchAndSwap, sortMiddle)
			{
			std::vector<int> orig{5, 9, 4, 54, 21, 6, 7, 9, 0, 9, 42, 4};
			TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig);
			auto view = cudaArr.getView();
			int from = 3, to = 8;

			fetchAndSwap_sortMiddle(view, from, to);
			ASSERT_TRUE(is_sorted(view.getView(3, 8))) << "result " << view << std::endl;

			for(size_t i = 0; i < orig.size(); i++)
			{
			if(i < from \|\| i >= to)
			ASSERT_TRUE(view.getElement(i) == orig[i]);
			}
			}


	//----------------------------------------------------------------------------------		//----------------------------------------------------------------------------------