Commit 047ab2da authored by Xuan Thang Nguyen's avatar Xuan Thang Nguyen
Browse files

Merge branch 'master' into same_dir_optimization

parents c34a5f26 48785374
Loading
Loading
Loading
Loading
+56 −96
Original line number Diff line number Diff line
#include <string>
#include <chrono>
#include <iostream>
#include <algorithm>
#include <numeric>
#include <iomanip>

#include <TNL/Containers/Array.h>

#include "../bitonicSort.h"
#include "../../util/timer.h"

using namespace TNL;
using namespace TNL::Containers;
using namespace std;

typedef Devices::Cuda Device;

template <class T>
std::ostream& operator<< (std::ostream&out, std::vector<T> &arr)
int main()
{
    for (auto x : arr)
        std::cout << x << " ";
    return out;
}

struct TIMER
    srand(2021);
    for(int pow = 10; pow <= 20; pow++)
    {
    std::string s;
    std::chrono::steady_clock::time_point begin;
    double result = 0;
    bool stopped = false;
        int size =(1<< pow);

        vector<int> vec(size);
        iota(vec.begin(), vec.end(), 0);

    TIMER(const std::string &name = "")
        : s(name), begin(std::chrono::steady_clock::now()) {}
        Array<int, Device> arr;
        vector<double> resAcc;

    double stop()
        //sorted sequence
        {
        auto end = std::chrono::steady_clock::now();
        result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.);
        stopped = true;
        return result;
    }
            arr = vec;
            auto view = arr.getView();

    void printTime()
            {
        if(!stopped)
            stop();
        std::cout << ("Measured " + s + ": ") << result << " ms" << std::endl;
                TIMER t([&](double res){resAcc.push_back(res);});
                bitonicSort(view);
            }
        }

    ~TIMER()
        //almost sorted sequence
        {
        if(!stopped)
            for(int i = 0; i < 3; i++)
            {
            stop();
            printTime();
                int s = rand() % (size - 3);
                std::swap(vec[s], vec[s + 1]);
            }
    }
};


void test1()
{
    int size = 1<<10;
    TNL::Containers::Array<int, Device> cudaArr(size);
    cudaArr.evaluate([=] __cuda_callable__ (int i) {return i;});
    auto view = cudaArr.getView();
            arr = vec;
            auto view = arr.getView();

            {
        TIMER t("sorted sequences");
                TIMER t([&](double res){resAcc.push_back(res);});
                bitonicSort(view);
            }
        }

void randomShuffles()
{
    int iterations = 100;
    std::cout << iterations << " random permutations" << std::endl;
    for(int p = 13; p <= 19; ++p)
    {
        int size = 1<<p;
        std::vector<int> orig(size);
        std::iota(orig.begin(), orig.end(), 0);
        std::vector<double> results;

        for (int i = 0; i < iterations; i++)
        //decreasing sequence
        {
            std::random_shuffle(orig.begin(), orig.end());
            for(size_t i = 0; i < size; i++)
                vec[i] = -i;
                
            TNL::Containers::Array<int, Device> cudaArr(orig);
            auto view = cudaArr.getView();
            std::vector<int> tmp(orig.begin(), orig.end());
            arr = vec;
            auto view = arr.getView();

            {
                TIMER t("random permutation");

                //std::sort(tmp.begin(), tmp.end());
                TIMER t([&](double res){resAcc.push_back(res);});
                bitonicSort(view);
                
                results.push_back(t.stop());
                //t.printTime();
            }

        }
        std::cout << "average time for arrSize = 2^" << p << ": " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl;

            }
        }
        
void allPermutations(std::vector<int> orig)
        //random sequence
        {
    std::vector<double> results;
    while (std::next_permutation(orig.begin(), orig.end()))
    {
        TNL::Containers::Array<int, Device> cudaArr(orig);
        auto view = cudaArr.getView();
            random_shuffle(vec.begin(), vec.end());

            arr = vec;
            auto view = arr.getView();

            {
            TIMER t("random permutation");
                TIMER t([&](double res){resAcc.push_back(res);});
                bitonicSort(view);
            results.push_back(t.stop());
            //t.printTime();
        }
            }
    std::cout << "average time: " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl;
        }


int main()
{
    randomShuffles();
        cout << "2^" << pow << " = ";
        cout << fixed;
        cout << setprecision(3);
        cout << (accumulate(resAcc.begin(), resAcc.end(), 0.0) / resAcc.size()) << " ms" << endl;
    }

    return 0;
}
 No newline at end of file
+27 −56
Original line number Diff line number Diff line
@@ -7,27 +7,19 @@
#include <TNL/Containers/Array.h>
#include <TNL/Algorithms/MemoryOperations.h>
#include "../bitonicSort.h"

template <typename Value>
bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr)
{
    std::vector<Value> tmp(arr.getSize());
    TNL::Algorithms::MultiDeviceMemoryOperations<TNL::Devices::Host, TNL::Devices::Cuda >::copy(tmp.data(), arr.getData(), arr.getSize());

    return std::is_sorted(tmp.begin(), tmp.end());
}
#include "../../util/algorithm.h"

//----------------------------------------------------------------------------------

TEST(permutations, allPermutationSize_3_to_7)
TEST(permutations, allPermutationSize_1_to_8)
{
    for(int i = 3; i<=7; i++ )
    for(int i = 2; i<=8; i++ )
    {
        int size = i;
        std::vector<int> orig(size);
        std::iota(orig.begin(), orig.end(), 0);

        while (std::next_permutation(orig.begin(), orig.end()))
        do
        {
            TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig);
            auto view = cudaArr.getView();
@@ -36,29 +28,7 @@ TEST(permutations, allPermutationSize_3_to_7)

            ASSERT_TRUE(is_sorted(view)) << "failed " << i << std::endl;
        } 
    }
}

TEST(permutations, somePermutationSize8)
{
    int size = 8;
    const int stride = 23;
    int i = 0;

    std::vector<int> orig(size);
    std::iota(orig.begin(), orig.end(), 0);

    while (std::next_permutation(orig.begin(), orig.end()))
    {
        if ((i++) % stride != 0)
            continue;

        TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig);
        auto view = cudaArr.getView();

        bitonicSort(view);

        ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
        while (std::next_permutation(orig.begin(), orig.end()));
    }
}

@@ -71,7 +41,7 @@ TEST(permutations, somePermutationSize9)
    std::vector<int> orig(size);
    std::iota(orig.begin(), orig.end(), 0);

    while (std::next_permutation(orig.begin(), orig.end()))
    do
    {
        if ((i++) % stride != 0)
            continue;
@@ -83,35 +53,43 @@ TEST(permutations, somePermutationSize9)

        ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
    }
    while (std::next_permutation(orig.begin(), orig.end()));
}

//-----------------------------------------------------------------------

TEST(selectedSize, size15)
{
    TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr{5, 9, 4, 8, 6, 1, 2, 3, 4, 8, 1, 6, 9, 4, 9};
    auto view = cudaArr.getView();
    ASSERT_EQ(15, view.getSize());
    ASSERT_EQ(15, view.getSize()) << "size not 15" << std::endl;
    bitonicSort(view);
    ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
}

TEST(multiblock, 32768_decreasingNegative)
{
    TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(1 << 15);
    for (int i = 0; i < cudaArr.getSize(); i++)
        cudaArr.setElement(i, -i);
    std::vector<int> arr(1<<15);
    for (size_t i = 0; i < arr.size(); i++)
        arr[i] = -i;
    
    TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr);
    auto view = cudaArr.getView();

    bitonicSort(view);
    ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
}

TEST(randomGenerated, smallArray_randomVal)
{
    std::srand(2006);
    for(int i = 0; i < 100; i++)
    {
        TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(std::rand()%(1<<10));
        for (int j = 0; j < cudaArr.getSize(); j++)
            cudaArr.setElement(j, std::rand());
        std::vector<int> arr(std::rand()%(1<<10));
        for(auto & x : arr)
            x = std::rand();

        TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr);

        auto view = cudaArr.getView();
        bitonicSort(view);
@@ -121,6 +99,7 @@ TEST(randomGenerated, smallArray_randomVal)

TEST(randomGenerated, bigArray_all0)
{
    std::srand(304);
    for(int i = 0; i < 50; i++)
    {
        int size = (1<<20) + (std::rand()% (1<<19));
@@ -149,31 +128,23 @@ TEST(nonIntegerType, double_notPow2)
    ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
}

/*

struct TMPSTRUCT{
    uint8_t m_data[6];
    TMPSTRUCT(){m_data[0] = 0;}
    TMPSTRUCT(int first){m_data[0] = first;};
    bool operator <(const TMPSTRUCT& other) const { return m_data[0] < other.m_data[0];}

    bool operator ==(const TMPSTRUCT& other) const {return !(*this < other) && !(other < *this); }

    bool operator >=(const TMPSTRUCT& other) const {return !(*this < other); }
    bool operator >(const TMPSTRUCT& other) const {return !(*this <= other); }
    bool operator <=(const TMPSTRUCT& other) const {return (*this < other) || (other == *this); }

    std::ostream& operator << (std::ostream & out) { return out << "{ " << m_data[0] << " }";}
    bool operator <=(const TMPSTRUCT& other) const { return m_data[0] <= other.m_data[0];}
};

TEST(nonIntegerType, struct)
{

    TNL::Containers::Array<TMPSTRUCT, TNL::Devices::Cuda> cudaArr{TMPSTRUCT(5), TMPSTRUCT(6), TMPSTRUCT(9), TMPSTRUCT(1)};
    auto view = cudaArr.getView();
    bitonicSort(view);
    ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl;
    ASSERT_TRUE(is_sorted(view));
}
*/


//error bypassing
//https://mmg-gitlab.fjfi.cvut.cz/gitlab/tnl/tnl-dev/blob/fbc34f6a97c13ec865ef7969b9704533222ed408/src/UnitTests/Containers/VectorTest-8.h

util/algorithm.h

0 → 100644
+19 −0
Original line number Diff line number Diff line
#pragma once
#include <TNL/Containers/Array.h>
#include <TNL/Algorithms/Reduction.h>

template <typename Value, typename Function>
bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr, const Function &Cmp)
{
    if(arr.getSize() <= 1) return true;

    auto fetch = [=] __cuda_callable__(int i) { return Cmp(arr[i - 1], arr[i]); };
    auto reduction = [] __cuda_callable__(bool a, bool b) { return a && b; };
    return TNL::Algorithms::Reduction<TNL::Devices::Cuda>::reduce(1, arr.getSize(), reduction, fetch, true);
}

template <typename Value>
bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr)
{
    return is_sorted(arr, [] __cuda_callable__(const Value &a, const Value &b) { return a <= b; });
}

util/timer.h

0 → 100644
+22 −0
Original line number Diff line number Diff line
#pragma once

#include <string>
#include <chrono>
#include <functional>
#include <iostream>

struct TIMER
{
    std::function<void(double)> f;
    std::chrono::high_resolution_clock::time_point begin;

    TIMER(std::function<void(double)> func = [](double res){std::cout << res << std::endl;})
        : f(func), begin(std::chrono::high_resolution_clock::now()) {}

    ~TIMER()
    {
        auto end = std::chrono::high_resolution_clock::now();
        double result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.);
        f(result);
    }
};
 No newline at end of file