Commit d3c2dc6c authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

fix fetch and add tests for bitonic

parent d4e46a7e
Loading
Loading
Loading
Loading
+6 −6
Original line number Original line Diff line number Diff line
@@ -296,8 +296,8 @@ void bitonicSort(std::vector<Value> & vec)
//---------------------------------------------
//---------------------------------------------


template <typename FETCH, typename CMP,  typename SWAP>
template <typename FETCH, typename CMP,  typename SWAP>
__global__ void bitonicMergeGlobal(int size, const FETCH & Fetch, 
__global__ void bitonicMergeGlobal(int size, FETCH Fetch, 
                                 const CMP & Cmp, const SWAP & Swap,
                                 const CMP & Cmp, SWAP Swap,
                                 int monotonicSeqLen, int len, int partsInSeq)
                                 int monotonicSeqLen, int len, int partsInSeq)
{
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    int i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -316,14 +316,14 @@ __global__ void bitonicMergeGlobal(int size, const FETCH & Fetch,
    if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase
    if ((monotonicSeqIdx + 1) * monotonicSeqLen >= size) //special case for part with no "partner" to be merged with in next phase
        ascending = true;
        ascending = true;


    if( (ascending == Cmp(Fetch(e), Fetch(s))))
    if( ascending == Cmp(Fetch(e), Fetch(s)))
        Swap(s, e);
        Swap(s, e);
}
}






template <typename FETCH, typename CMP,  typename SWAP>
template <typename FETCH, typename CMP,  typename SWAP>
void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const SWAP & Swap)
void bitonicSort(int begin, int end, FETCH Fetch, const CMP& Cmp, SWAP Swap)
{
{
    int size = end - begin;
    int size = end - begin;
    int paddedSize = closestPow2(size);
    int paddedSize = closestPow2(size);
@@ -341,9 +341,9 @@ void bitonicSort(int begin, int end, const FETCH & Fetch, const CMP& Cmp, const
        };
        };
        
        
    auto swapWithOffset = 
    auto swapWithOffset = 
        [=] __cuda_callable__(int i, int j)
        [=] __cuda_callable__(int i, int j) mutable
        {
        {
            return Swap(i+begin, i+begin);
            Swap(i+begin, j+begin);
        };
        };


    for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2)
    for (int monotonicSeqLen = 2; monotonicSeqLen <= paddedSize; monotonicSeqLen *= 2)
+49 −9
Original line number Original line Diff line number Diff line
@@ -239,15 +239,14 @@ TEST(sortRange, middleMultiBlock)
    ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1);
    ASSERT_TRUE(arr[e + (std::rand() % (size - e))] == -1);
    ASSERT_TRUE(arr.back() == -1); 
    ASSERT_TRUE(arr.back() == -1); 
}
}
/*
void fetchAndSwapSorter(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view)
{
    
    //auto Fetch = [=]__cuda_callable__(int i){return view[i];};
    //auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;};
    //auto Swap = [=] __device__ (int i, int j){TNL::swap(view[i], view[j]);};
    //bitonicSort(0, view.getSize(), Fetch, Cmp, Swap);


template<typename TYPE>
void fetchAndSwapSorter(TNL::Containers::ArrayView<TYPE, TNL::Devices::Cuda> view)
{
    auto Fetch = [=]__cuda_callable__(int i){return view[i];};
    auto Cmp = [=]__cuda_callable__(const TYPE & a, const TYPE & b){return a < b;};
    auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);};
    bitonicSort(0, view.getSize(), Fetch, Cmp, Swap);
}
}


TEST(fetchAndSwap, oneBlockSort)
TEST(fetchAndSwap, oneBlockSort)
@@ -271,7 +270,48 @@ TEST(fetchAndSwap, oneBlockSort)
    }
    }
    while (std::next_permutation(orig.begin(), orig.end()));
    while (std::next_permutation(orig.begin(), orig.end()));
}
}
*/

TEST(fetchAndSwap, typeDouble)
{
    int size = 5;
    std::vector<double> orig(size);
    std::iota(orig.begin(), orig.end(), 0);

    do
    {
        TNL::Containers::Array<double, TNL::Devices::Cuda> cudaArr(orig);
        auto view = cudaArr.getView();
        fetchAndSwapSorter(view);
        ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
    }
    while (std::next_permutation(orig.begin(), orig.end()));
}

void fetchAndSwap_sortMiddle(TNL::Containers::ArrayView<int, TNL::Devices::Cuda> view, int from, int to)
{
    auto Fetch = [=]__cuda_callable__(int i){return view[i];};
    auto Cmp = [=]__cuda_callable__(const int & a, const int & b){return a < b;};
    auto Swap = [=] __cuda_callable__ (int i, int j) mutable {TNL::swap(view[i], view[j]);};
    bitonicSort(from, to, Fetch, Cmp, Swap);
}

TEST(fetchAndSwap, sortMiddle)
{
    std::vector<int> orig{5, 9, 4, 54, 21, 6, 7, 9, 0, 9, 42, 4};
    TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig);
    auto view = cudaArr.getView();
    int from = 3, to = 8;

    fetchAndSwap_sortMiddle(view, from, to);
    ASSERT_TRUE(is_sorted(view.getView(3, 8))) << "result " << view << std::endl;

    for(size_t i = 0; i < orig.size(); i++)
    {
        if(i < from || i >= to)
            ASSERT_TRUE(view.getElement(i) == orig[i]);
    }
}



//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------