Loading bitonicGPU/benchmark/benchmark.cu +56 −96 Original line number Diff line number Diff line #include <string> #include <chrono> #include <iostream> #include <algorithm> #include <numeric> #include <iomanip> #include <TNL/Containers/Array.h> #include "../bitonicSort.h" #include "../../util/timer.h" using namespace TNL; using namespace TNL::Containers; using namespace std; typedef Devices::Cuda Device; template <class T> std::ostream& operator<< (std::ostream&out, std::vector<T> &arr) int main() { for (auto x : arr) std::cout << x << " "; return out; } struct TIMER srand(2021); for(int pow = 10; pow <= 20; pow++) { std::string s; std::chrono::steady_clock::time_point begin; double result = 0; bool stopped = false; int size =(1<< pow); vector<int> vec(size); iota(vec.begin(), vec.end(), 0); TIMER(const std::string &name = "") : s(name), begin(std::chrono::steady_clock::now()) {} Array<int, Device> arr; vector<double> resAcc; double stop() //sorted sequence { auto end = std::chrono::steady_clock::now(); result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.); stopped = true; return result; } arr = vec; auto view = arr.getView(); void printTime() { if(!stopped) stop(); std::cout << ("Measured " + s + ": ") << result << " ms" << std::endl; TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); } } ~TIMER() //almost sorted sequence { if(!stopped) for(int i = 0; i < 3; i++) { stop(); printTime(); int s = rand() % (size - 3); std::swap(vec[s], vec[s + 1]); } } }; void test1() { int size = 1<<10; TNL::Containers::Array<int, Device> cudaArr(size); cudaArr.evaluate([=] __cuda_callable__ (int i) {return i;}); auto view = cudaArr.getView(); arr = vec; auto view = arr.getView(); { TIMER t("sorted sequences"); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); } } void randomShuffles() { int iterations = 100; std::cout << iterations << " random permutations" << std::endl; for(int p = 13; p <= 19; ++p) { int size = 1<<p; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); std::vector<double> results; for (int i = 0; i < iterations; i++) //decreasing sequence { std::random_shuffle(orig.begin(), orig.end()); for(size_t i = 0; i < size; i++) vec[i] = -i; TNL::Containers::Array<int, Device> cudaArr(orig); auto view = cudaArr.getView(); std::vector<int> tmp(orig.begin(), orig.end()); arr = vec; auto view = arr.getView(); { TIMER t("random permutation"); //std::sort(tmp.begin(), tmp.end()); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); results.push_back(t.stop()); //t.printTime(); } } std::cout << "average time for arrSize = 2^" << p << ": " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl; } } void allPermutations(std::vector<int> orig) //random sequence { std::vector<double> results; while (std::next_permutation(orig.begin(), orig.end())) { TNL::Containers::Array<int, Device> cudaArr(orig); auto view = cudaArr.getView(); random_shuffle(vec.begin(), vec.end()); arr = vec; auto view = arr.getView(); { TIMER t("random permutation"); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); results.push_back(t.stop()); //t.printTime(); } } std::cout << "average time: " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl; } int main() { randomShuffles(); cout << "2^" << pow << " = "; cout << fixed; cout << setprecision(3); cout << (accumulate(resAcc.begin(), resAcc.end(), 0.0) / resAcc.size()) << " ms" << endl; } return 0; } No newline at end of file bitonicGPU/unitTests/unitTests.cu +27 −56 Original line number Diff line number Diff line Loading @@ -7,27 +7,19 @@ #include <TNL/Containers/Array.h> #include <TNL/Algorithms/MemoryOperations.h> #include "../bitonicSort.h" template <typename Value> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr) { std::vector<Value> tmp(arr.getSize()); TNL::Algorithms::MultiDeviceMemoryOperations<TNL::Devices::Host, TNL::Devices::Cuda >::copy(tmp.data(), arr.getData(), arr.getSize()); return std::is_sorted(tmp.begin(), tmp.end()); } #include "../../util/algorithm.h" //---------------------------------------------------------------------------------- TEST(permutations, allPermutationSize_3_to_7) TEST(permutations, allPermutationSize_1_to_8) { for(int i = 3; i<=7; i++ ) for(int i = 2; i<=8; i++ ) { int size = i; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) do { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); Loading @@ -36,29 +28,7 @@ TEST(permutations, allPermutationSize_3_to_7) ASSERT_TRUE(is_sorted(view)) << "failed " << i << std::endl; } } } TEST(permutations, somePermutationSize8) { int size = 8; const int stride = 23; int i = 0; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) { if ((i++) % stride != 0) continue; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; while (std::next_permutation(orig.begin(), orig.end())); } } Loading @@ -71,7 +41,7 @@ TEST(permutations, somePermutationSize9) std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) do { if ((i++) % stride != 0) continue; Loading @@ -83,35 +53,43 @@ TEST(permutations, somePermutationSize9) ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } while (std::next_permutation(orig.begin(), orig.end())); } //----------------------------------------------------------------------- TEST(selectedSize, size15) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr{5, 9, 4, 8, 6, 1, 2, 3, 4, 8, 1, 6, 9, 4, 9}; auto view = cudaArr.getView(); ASSERT_EQ(15, view.getSize()); ASSERT_EQ(15, view.getSize()) << "size not 15" << std::endl; bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } TEST(multiblock, 32768_decreasingNegative) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(1 << 15); for (int i = 0; i < cudaArr.getSize(); i++) cudaArr.setElement(i, -i); std::vector<int> arr(1<<15); for (size_t i = 0; i < arr.size(); i++) arr[i] = -i; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr); auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } TEST(randomGenerated, smallArray_randomVal) { std::srand(2006); for(int i = 0; i < 100; i++) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(std::rand()%(1<<10)); for (int j = 0; j < cudaArr.getSize(); j++) cudaArr.setElement(j, std::rand()); std::vector<int> arr(std::rand()%(1<<10)); for(auto & x : arr) x = std::rand(); TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr); auto view = cudaArr.getView(); bitonicSort(view); Loading @@ -121,6 +99,7 @@ TEST(randomGenerated, smallArray_randomVal) TEST(randomGenerated, bigArray_all0) { std::srand(304); for(int i = 0; i < 50; i++) { int size = (1<<20) + (std::rand()% (1<<19)); Loading Loading @@ -149,31 +128,23 @@ TEST(nonIntegerType, double_notPow2) ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } /* struct TMPSTRUCT{ uint8_t m_data[6]; TMPSTRUCT(){m_data[0] = 0;} TMPSTRUCT(int first){m_data[0] = first;}; bool operator <(const TMPSTRUCT& other) const { return m_data[0] < other.m_data[0];} bool operator ==(const TMPSTRUCT& other) const {return !(*this < other) && !(other < *this); } bool operator >=(const TMPSTRUCT& other) const {return !(*this < other); } bool operator >(const TMPSTRUCT& other) const {return !(*this <= other); } bool operator <=(const TMPSTRUCT& other) const {return (*this < other) || (other == *this); } std::ostream& operator << (std::ostream & out) { return out << "{ " << m_data[0] << " }";} bool operator <=(const TMPSTRUCT& other) const { return m_data[0] <= other.m_data[0];} }; TEST(nonIntegerType, struct) { TNL::Containers::Array<TMPSTRUCT, TNL::Devices::Cuda> cudaArr{TMPSTRUCT(5), TMPSTRUCT(6), TMPSTRUCT(9), TMPSTRUCT(1)}; auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; ASSERT_TRUE(is_sorted(view)); } */ //error bypassing //https://mmg-gitlab.fjfi.cvut.cz/gitlab/tnl/tnl-dev/blob/fbc34f6a97c13ec865ef7969b9704533222ed408/src/UnitTests/Containers/VectorTest-8.h Loading util/algorithm.h 0 → 100644 +19 −0 Original line number Diff line number Diff line #pragma once #include <TNL/Containers/Array.h> #include <TNL/Algorithms/Reduction.h> template <typename Value, typename Function> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr, const Function &Cmp) { if(arr.getSize() <= 1) return true; auto fetch = [=] __cuda_callable__(int i) { return Cmp(arr[i - 1], arr[i]); }; auto reduction = [] __cuda_callable__(bool a, bool b) { return a && b; }; return TNL::Algorithms::Reduction<TNL::Devices::Cuda>::reduce(1, arr.getSize(), reduction, fetch, true); } template <typename Value> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr) { return is_sorted(arr, [] __cuda_callable__(const Value &a, const Value &b) { return a <= b; }); } util/timer.h 0 → 100644 +22 −0 Original line number Diff line number Diff line #pragma once #include <string> #include <chrono> #include <functional> #include <iostream> struct TIMER { std::function<void(double)> f; std::chrono::high_resolution_clock::time_point begin; TIMER(std::function<void(double)> func = [](double res){std::cout << res << std::endl;}) : f(func), begin(std::chrono::high_resolution_clock::now()) {} ~TIMER() { auto end = std::chrono::high_resolution_clock::now(); double result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.); f(result); } }; No newline at end of file Loading
bitonicGPU/benchmark/benchmark.cu +56 −96 Original line number Diff line number Diff line #include <string> #include <chrono> #include <iostream> #include <algorithm> #include <numeric> #include <iomanip> #include <TNL/Containers/Array.h> #include "../bitonicSort.h" #include "../../util/timer.h" using namespace TNL; using namespace TNL::Containers; using namespace std; typedef Devices::Cuda Device; template <class T> std::ostream& operator<< (std::ostream&out, std::vector<T> &arr) int main() { for (auto x : arr) std::cout << x << " "; return out; } struct TIMER srand(2021); for(int pow = 10; pow <= 20; pow++) { std::string s; std::chrono::steady_clock::time_point begin; double result = 0; bool stopped = false; int size =(1<< pow); vector<int> vec(size); iota(vec.begin(), vec.end(), 0); TIMER(const std::string &name = "") : s(name), begin(std::chrono::steady_clock::now()) {} Array<int, Device> arr; vector<double> resAcc; double stop() //sorted sequence { auto end = std::chrono::steady_clock::now(); result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.); stopped = true; return result; } arr = vec; auto view = arr.getView(); void printTime() { if(!stopped) stop(); std::cout << ("Measured " + s + ": ") << result << " ms" << std::endl; TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); } } ~TIMER() //almost sorted sequence { if(!stopped) for(int i = 0; i < 3; i++) { stop(); printTime(); int s = rand() % (size - 3); std::swap(vec[s], vec[s + 1]); } } }; void test1() { int size = 1<<10; TNL::Containers::Array<int, Device> cudaArr(size); cudaArr.evaluate([=] __cuda_callable__ (int i) {return i;}); auto view = cudaArr.getView(); arr = vec; auto view = arr.getView(); { TIMER t("sorted sequences"); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); } } void randomShuffles() { int iterations = 100; std::cout << iterations << " random permutations" << std::endl; for(int p = 13; p <= 19; ++p) { int size = 1<<p; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); std::vector<double> results; for (int i = 0; i < iterations; i++) //decreasing sequence { std::random_shuffle(orig.begin(), orig.end()); for(size_t i = 0; i < size; i++) vec[i] = -i; TNL::Containers::Array<int, Device> cudaArr(orig); auto view = cudaArr.getView(); std::vector<int> tmp(orig.begin(), orig.end()); arr = vec; auto view = arr.getView(); { TIMER t("random permutation"); //std::sort(tmp.begin(), tmp.end()); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); results.push_back(t.stop()); //t.printTime(); } } std::cout << "average time for arrSize = 2^" << p << ": " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl; } } void allPermutations(std::vector<int> orig) //random sequence { std::vector<double> results; while (std::next_permutation(orig.begin(), orig.end())) { TNL::Containers::Array<int, Device> cudaArr(orig); auto view = cudaArr.getView(); random_shuffle(vec.begin(), vec.end()); arr = vec; auto view = arr.getView(); { TIMER t("random permutation"); TIMER t([&](double res){resAcc.push_back(res);}); bitonicSort(view); results.push_back(t.stop()); //t.printTime(); } } std::cout << "average time: " << std::accumulate(results.begin(), results.end(), 0.)/results.size() << " ms" << std::endl; } int main() { randomShuffles(); cout << "2^" << pow << " = "; cout << fixed; cout << setprecision(3); cout << (accumulate(resAcc.begin(), resAcc.end(), 0.0) / resAcc.size()) << " ms" << endl; } return 0; } No newline at end of file
bitonicGPU/unitTests/unitTests.cu +27 −56 Original line number Diff line number Diff line Loading @@ -7,27 +7,19 @@ #include <TNL/Containers/Array.h> #include <TNL/Algorithms/MemoryOperations.h> #include "../bitonicSort.h" template <typename Value> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr) { std::vector<Value> tmp(arr.getSize()); TNL::Algorithms::MultiDeviceMemoryOperations<TNL::Devices::Host, TNL::Devices::Cuda >::copy(tmp.data(), arr.getData(), arr.getSize()); return std::is_sorted(tmp.begin(), tmp.end()); } #include "../../util/algorithm.h" //---------------------------------------------------------------------------------- TEST(permutations, allPermutationSize_3_to_7) TEST(permutations, allPermutationSize_1_to_8) { for(int i = 3; i<=7; i++ ) for(int i = 2; i<=8; i++ ) { int size = i; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) do { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); Loading @@ -36,29 +28,7 @@ TEST(permutations, allPermutationSize_3_to_7) ASSERT_TRUE(is_sorted(view)) << "failed " << i << std::endl; } } } TEST(permutations, somePermutationSize8) { int size = 8; const int stride = 23; int i = 0; std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) { if ((i++) % stride != 0) continue; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(orig); auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; while (std::next_permutation(orig.begin(), orig.end())); } } Loading @@ -71,7 +41,7 @@ TEST(permutations, somePermutationSize9) std::vector<int> orig(size); std::iota(orig.begin(), orig.end(), 0); while (std::next_permutation(orig.begin(), orig.end())) do { if ((i++) % stride != 0) continue; Loading @@ -83,35 +53,43 @@ TEST(permutations, somePermutationSize9) ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } while (std::next_permutation(orig.begin(), orig.end())); } //----------------------------------------------------------------------- TEST(selectedSize, size15) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr{5, 9, 4, 8, 6, 1, 2, 3, 4, 8, 1, 6, 9, 4, 9}; auto view = cudaArr.getView(); ASSERT_EQ(15, view.getSize()); ASSERT_EQ(15, view.getSize()) << "size not 15" << std::endl; bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } TEST(multiblock, 32768_decreasingNegative) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(1 << 15); for (int i = 0; i < cudaArr.getSize(); i++) cudaArr.setElement(i, -i); std::vector<int> arr(1<<15); for (size_t i = 0; i < arr.size(); i++) arr[i] = -i; TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr); auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } TEST(randomGenerated, smallArray_randomVal) { std::srand(2006); for(int i = 0; i < 100; i++) { TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(std::rand()%(1<<10)); for (int j = 0; j < cudaArr.getSize(); j++) cudaArr.setElement(j, std::rand()); std::vector<int> arr(std::rand()%(1<<10)); for(auto & x : arr) x = std::rand(); TNL::Containers::Array<int, TNL::Devices::Cuda> cudaArr(arr); auto view = cudaArr.getView(); bitonicSort(view); Loading @@ -121,6 +99,7 @@ TEST(randomGenerated, smallArray_randomVal) TEST(randomGenerated, bigArray_all0) { std::srand(304); for(int i = 0; i < 50; i++) { int size = (1<<20) + (std::rand()% (1<<19)); Loading Loading @@ -149,31 +128,23 @@ TEST(nonIntegerType, double_notPow2) ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; } /* struct TMPSTRUCT{ uint8_t m_data[6]; TMPSTRUCT(){m_data[0] = 0;} TMPSTRUCT(int first){m_data[0] = first;}; bool operator <(const TMPSTRUCT& other) const { return m_data[0] < other.m_data[0];} bool operator ==(const TMPSTRUCT& other) const {return !(*this < other) && !(other < *this); } bool operator >=(const TMPSTRUCT& other) const {return !(*this < other); } bool operator >(const TMPSTRUCT& other) const {return !(*this <= other); } bool operator <=(const TMPSTRUCT& other) const {return (*this < other) || (other == *this); } std::ostream& operator << (std::ostream & out) { return out << "{ " << m_data[0] << " }";} bool operator <=(const TMPSTRUCT& other) const { return m_data[0] <= other.m_data[0];} }; TEST(nonIntegerType, struct) { TNL::Containers::Array<TMPSTRUCT, TNL::Devices::Cuda> cudaArr{TMPSTRUCT(5), TMPSTRUCT(6), TMPSTRUCT(9), TMPSTRUCT(1)}; auto view = cudaArr.getView(); bitonicSort(view); ASSERT_TRUE(is_sorted(view)) << "result " << view << std::endl; ASSERT_TRUE(is_sorted(view)); } */ //error bypassing //https://mmg-gitlab.fjfi.cvut.cz/gitlab/tnl/tnl-dev/blob/fbc34f6a97c13ec865ef7969b9704533222ed408/src/UnitTests/Containers/VectorTest-8.h Loading
util/algorithm.h 0 → 100644 +19 −0 Original line number Diff line number Diff line #pragma once #include <TNL/Containers/Array.h> #include <TNL/Algorithms/Reduction.h> template <typename Value, typename Function> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr, const Function &Cmp) { if(arr.getSize() <= 1) return true; auto fetch = [=] __cuda_callable__(int i) { return Cmp(arr[i - 1], arr[i]); }; auto reduction = [] __cuda_callable__(bool a, bool b) { return a && b; }; return TNL::Algorithms::Reduction<TNL::Devices::Cuda>::reduce(1, arr.getSize(), reduction, fetch, true); } template <typename Value> bool is_sorted(TNL::Containers::ArrayView<Value, TNL::Devices::Cuda> arr) { return is_sorted(arr, [] __cuda_callable__(const Value &a, const Value &b) { return a <= b; }); }
util/timer.h 0 → 100644 +22 −0 Original line number Diff line number Diff line #pragma once #include <string> #include <chrono> #include <functional> #include <iostream> struct TIMER { std::function<void(double)> f; std::chrono::high_resolution_clock::time_point begin; TIMER(std::function<void(double)> func = [](double res){std::cout << res << std::endl;}) : f(func), begin(std::chrono::high_resolution_clock::now()) {} ~TIMER() { auto end = std::chrono::high_resolution_clock::now(); double result = (std::chrono::duration_cast<std::chrono::microseconds >(end - begin).count() / 1000.); f(result); } }; No newline at end of file