Loading benchmark/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ endfunction() add_benchmark_project(bwtree) add_benchmark_project(owg) add_benchmark_project(cuco) add_benchmark_project(palm) add_benchmark_project(std) add_benchmark_project(stx) Loading benchmark/_common/benchmark.hpp +30 −0 Original line number Diff line number Diff line Loading @@ -13,6 +13,8 @@ #include <utility> #include <vector> #include "zipf.hpp" namespace Benchmark { namespace Device { Loading Loading @@ -47,6 +49,29 @@ template <typename Type> std::vector<Type> shuffle(int size) { return res; } template <typename Type> std::vector<Type> gaussian(int size) { std::vector<Type> res(size); std::random_device rd; std::mt19937 g(rd()); std::normal_distribution<> dg(size, size / 2.0); for (auto i = 0; i < size; ++i) { res[i] = std::abs(dg(g)); } return res; } template <typename Type> std::vector<Type> zipf(int size) { std::vector<Type> res(size); Zipf<Type> zipf((uint64_t)size, 0.98, (uint64_t)time(nullptr)); for (int i = 0; i < size; i++) { res.push_back(zipf.next()); } return res; } template <typename Type> std::vector<Type> almostSorted(int size, int swapCount = 5) { std::vector<Type> res(size); Loading Loading @@ -173,6 +198,11 @@ void execute(const std::string name, Code &&code, int from = 10, int to = 17, execRun<Device>("shuffle", code, attempts, row, Generators::shuffle<Type>(size)); execRun<Device>("gaussian", code, attempts, row, Generators::gaussian<Type>(size)); execRun<Device>("zipf", code, attempts, row, Generators::zipf<Type>(size)); execRun<Device>("almostSorted", code, attempts, row, Generators::almostSorted<Type>(size)); Loading benchmark/_common/zipf.hpp 0 → 100644 +141 −0 Original line number Diff line number Diff line #pragma once #include <algorithm> #include <cassert> #include <cstdint> #include <cstdio> #include <cstdlib> #include <random> #include <set> #include <vector> #include <cmath> #include <cstdio> #include <cstdlib> #include <cstring> template <typename T> class Zipf { uint64_t n; double mTheta; double mAlpha; double mThres; uint64_t mLastN; double mDblN; double mZetan; double mEta; uint64_t mRandState; static inline double fastRandD(uint64_t *state) { *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); return (double)*state / (double)((1UL << 48) - 1); } // http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/ static inline double powApprox(double a, double b) { union { double d; int x[2]; } u = {a}; u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447); u.x[0] = 0; return u.d; } static inline double zeta(uint64_t lastN, double lastSum, uint64_t n, double theta) { if (lastN > n) { lastN = 0; lastSum = 0.; } while (lastN < n) { lastSum += 1. / powApprox((double)lastN + 1., theta); lastN++; } return lastSum; } public: Zipf(uint64_t n, double theta, uint64_t randSeed) { assert(n > 0); if (theta > 0.992 && theta < 1) { fprintf(stderr, "theta > 0.992 will be inaccurate due to approximation\n"); } if (theta >= 1. && theta < 40.) { fprintf(stderr, "theta in [1., 40.) is not supported\n"); assert(false); } assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.); assert(randSeed < (1UL << 48)); memset(this, 0, sizeof(*this)); this->n = n; this->mTheta = theta; if (theta == -1.) { randSeed = randSeed % n; } else if (theta > 0. && theta < 1.) { this->mAlpha = 1. / (1. - theta); this->mThres = 1. + powApprox(0.5, theta); } else { this->mAlpha = 0.; this->mThres = 0.; } this->mLastN = 0; this->mZetan = 0.; this->mRandState = randSeed; } T next() { if (this->mLastN != this->n) { if (this->mTheta > 0. && this->mTheta < 1.) { this->mZetan = zeta(this->mLastN, this->mZetan, this->n, this->mTheta); this->mEta = (1. - powApprox(2. / (double)this->n, 1. - this->mTheta)) / (1. - zeta(0, 0., 2, this->mTheta) / this->mZetan); } this->mLastN = this->n; this->mDblN = (double)this->n; } if (this->mTheta == -1.) { T v = this->mRandState; if (++this->mRandState >= this->n) { this->mRandState = 0; } return v; } if (this->mTheta == 0.) { double u = fastRandD(&this->mRandState); return (T)(this->mDblN * u); } if (this->mTheta >= 40.) { return 0; } // from J. Gray et al. Quickly generating billion-record synthetic // databases. In SIGMOD, 1994. // double u = erand48(this->rand_state); double u = fastRandD(&this->mRandState); double uz = u * this->mZetan; if (uz < 1.) { return 0; } if (uz < this->mThres) { return 1; } return (T)(this->mDblN * powApprox(this->mEta * (u - 1.) + 1., this->mAlpha)); } }; cuCollections @ 0b672bbd Compare 0b672bbd to 0b672bbd Original line number Diff line number Diff line Subproject commit 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700 benchmark/tnl/tnl_cuda.cu +8 −5 Original line number Diff line number Diff line #include "../_common/benchmark.hpp" #include "../../implementation/BPlusTree/src/BTreeContainer/Default.hpp" #include "../../implementation/src/BPlusTree/Default.hpp" #include "../../implementation/src/BTreeContainer/Default.hpp" #include <TNL/Assert.h> #include <TNL/Containers/Array.h> Loading @@ -15,15 +16,16 @@ using namespace TNL::Containers; int main(int argc, char **argv) { using KeyType = uint32_t; using ValueType = uint32_t; const size_t KeyInf = UINT32_MAX; const size_t Order = 15; Benchmark::execute<Benchmark::Device::Cuda, KeyType>( "tnl::cuda", [](auto &timer, std::vector<KeyType> input) { using KeyArray = Containers::Array<ValueType, TNL::Devices::Cuda>; using ValueArray = Containers::Array<ValueType, TNL::Devices::Cuda>; using Tree = BTreeContainer<KeyType, ValueType, Order, KeyInf, TNL::Devices::Cuda>; using MaskArray = Containers::Array<bool, TNL::Devices::Cuda>; using Tree = BTreeContainer<KeyType, ValueType, Order, TNL::Devices::Cuda, BPlusTree>; Tree container(getContainerSize(input.size(), Order)); Loading @@ -37,9 +39,10 @@ int main(int argc, char **argv) { timer.stop("insert"); ValueArray results(input.size()); MaskArray mask(input.size()); timer.start(); container.find(keys, results); container.find(keys, results, mask); timer.stop("query"); }); Loading Loading
benchmark/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ endfunction() add_benchmark_project(bwtree) add_benchmark_project(owg) add_benchmark_project(cuco) add_benchmark_project(palm) add_benchmark_project(std) add_benchmark_project(stx) Loading
benchmark/_common/benchmark.hpp +30 −0 Original line number Diff line number Diff line Loading @@ -13,6 +13,8 @@ #include <utility> #include <vector> #include "zipf.hpp" namespace Benchmark { namespace Device { Loading Loading @@ -47,6 +49,29 @@ template <typename Type> std::vector<Type> shuffle(int size) { return res; } template <typename Type> std::vector<Type> gaussian(int size) { std::vector<Type> res(size); std::random_device rd; std::mt19937 g(rd()); std::normal_distribution<> dg(size, size / 2.0); for (auto i = 0; i < size; ++i) { res[i] = std::abs(dg(g)); } return res; } template <typename Type> std::vector<Type> zipf(int size) { std::vector<Type> res(size); Zipf<Type> zipf((uint64_t)size, 0.98, (uint64_t)time(nullptr)); for (int i = 0; i < size; i++) { res.push_back(zipf.next()); } return res; } template <typename Type> std::vector<Type> almostSorted(int size, int swapCount = 5) { std::vector<Type> res(size); Loading Loading @@ -173,6 +198,11 @@ void execute(const std::string name, Code &&code, int from = 10, int to = 17, execRun<Device>("shuffle", code, attempts, row, Generators::shuffle<Type>(size)); execRun<Device>("gaussian", code, attempts, row, Generators::gaussian<Type>(size)); execRun<Device>("zipf", code, attempts, row, Generators::zipf<Type>(size)); execRun<Device>("almostSorted", code, attempts, row, Generators::almostSorted<Type>(size)); Loading
benchmark/_common/zipf.hpp 0 → 100644 +141 −0 Original line number Diff line number Diff line #pragma once #include <algorithm> #include <cassert> #include <cstdint> #include <cstdio> #include <cstdlib> #include <random> #include <set> #include <vector> #include <cmath> #include <cstdio> #include <cstdlib> #include <cstring> template <typename T> class Zipf { uint64_t n; double mTheta; double mAlpha; double mThres; uint64_t mLastN; double mDblN; double mZetan; double mEta; uint64_t mRandState; static inline double fastRandD(uint64_t *state) { *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); return (double)*state / (double)((1UL << 48) - 1); } // http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/ static inline double powApprox(double a, double b) { union { double d; int x[2]; } u = {a}; u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447); u.x[0] = 0; return u.d; } static inline double zeta(uint64_t lastN, double lastSum, uint64_t n, double theta) { if (lastN > n) { lastN = 0; lastSum = 0.; } while (lastN < n) { lastSum += 1. / powApprox((double)lastN + 1., theta); lastN++; } return lastSum; } public: Zipf(uint64_t n, double theta, uint64_t randSeed) { assert(n > 0); if (theta > 0.992 && theta < 1) { fprintf(stderr, "theta > 0.992 will be inaccurate due to approximation\n"); } if (theta >= 1. && theta < 40.) { fprintf(stderr, "theta in [1., 40.) is not supported\n"); assert(false); } assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.); assert(randSeed < (1UL << 48)); memset(this, 0, sizeof(*this)); this->n = n; this->mTheta = theta; if (theta == -1.) { randSeed = randSeed % n; } else if (theta > 0. && theta < 1.) { this->mAlpha = 1. / (1. - theta); this->mThres = 1. + powApprox(0.5, theta); } else { this->mAlpha = 0.; this->mThres = 0.; } this->mLastN = 0; this->mZetan = 0.; this->mRandState = randSeed; } T next() { if (this->mLastN != this->n) { if (this->mTheta > 0. && this->mTheta < 1.) { this->mZetan = zeta(this->mLastN, this->mZetan, this->n, this->mTheta); this->mEta = (1. - powApprox(2. / (double)this->n, 1. - this->mTheta)) / (1. - zeta(0, 0., 2, this->mTheta) / this->mZetan); } this->mLastN = this->n; this->mDblN = (double)this->n; } if (this->mTheta == -1.) { T v = this->mRandState; if (++this->mRandState >= this->n) { this->mRandState = 0; } return v; } if (this->mTheta == 0.) { double u = fastRandD(&this->mRandState); return (T)(this->mDblN * u); } if (this->mTheta >= 40.) { return 0; } // from J. Gray et al. Quickly generating billion-record synthetic // databases. In SIGMOD, 1994. // double u = erand48(this->rand_state); double u = fastRandD(&this->mRandState); double uz = u * this->mZetan; if (uz < 1.) { return 0; } if (uz < this->mThres) { return 1; } return (T)(this->mDblN * powApprox(this->mEta * (u - 1.) + 1., this->mAlpha)); } };
cuCollections @ 0b672bbd Compare 0b672bbd to 0b672bbd Original line number Diff line number Diff line Subproject commit 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700
benchmark/tnl/tnl_cuda.cu +8 −5 Original line number Diff line number Diff line #include "../_common/benchmark.hpp" #include "../../implementation/BPlusTree/src/BTreeContainer/Default.hpp" #include "../../implementation/src/BPlusTree/Default.hpp" #include "../../implementation/src/BTreeContainer/Default.hpp" #include <TNL/Assert.h> #include <TNL/Containers/Array.h> Loading @@ -15,15 +16,16 @@ using namespace TNL::Containers; int main(int argc, char **argv) { using KeyType = uint32_t; using ValueType = uint32_t; const size_t KeyInf = UINT32_MAX; const size_t Order = 15; Benchmark::execute<Benchmark::Device::Cuda, KeyType>( "tnl::cuda", [](auto &timer, std::vector<KeyType> input) { using KeyArray = Containers::Array<ValueType, TNL::Devices::Cuda>; using ValueArray = Containers::Array<ValueType, TNL::Devices::Cuda>; using Tree = BTreeContainer<KeyType, ValueType, Order, KeyInf, TNL::Devices::Cuda>; using MaskArray = Containers::Array<bool, TNL::Devices::Cuda>; using Tree = BTreeContainer<KeyType, ValueType, Order, TNL::Devices::Cuda, BPlusTree>; Tree container(getContainerSize(input.size(), Order)); Loading @@ -37,9 +39,10 @@ int main(int argc, char **argv) { timer.stop("insert"); ValueArray results(input.size()); MaskArray mask(input.size()); timer.start(); container.find(keys, results); container.find(keys, results, mask); timer.stop("query"); }); Loading