chore: add cuco in cmake, zipf and gaussian distribution (6208e646) · Commits · TNL / GPUBTree

benchmark/CMakeLists.txt

+1 −0

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@ endfunction()

		add_benchmark_project(bwtree)
		add_benchmark_project(owg)
		add_benchmark_project(cuco)
		add_benchmark_project(palm)
		add_benchmark_project(std)
		add_benchmark_project(stx)

benchmark/_common/benchmark.hpp

+30 −0

Original line number	Diff line number	Diff line
		@@ -13,6 +13,8 @@
		#include <utility>
		#include <vector>

		#include "zipf.hpp"

		namespace Benchmark {

		namespace Device {
		@@ -47,6 +49,29 @@ template <typename Type> std::vector<Type> shuffle(int size) {
		return res;
		}

		template <typename Type> std::vector<Type> gaussian(int size) {
		std::vector<Type> res(size);
		std::random_device rd;
		std::mt19937 g(rd());

		std::normal_distribution<> dg(size, size / 2.0);
		for (auto i = 0; i < size; ++i) {
		res[i] = std::abs(dg(g));
		}
		return res;
		}

		template <typename Type> std::vector<Type> zipf(int size) {
		std::vector<Type> res(size);

		Zipf<Type> zipf((uint64_t)size, 0.98, (uint64_t)time(nullptr));
		for (int i = 0; i < size; i++) {
		res.push_back(zipf.next());
		}

		return res;
		}

		template <typename Type>
		std::vector<Type> almostSorted(int size, int swapCount = 5) {
		std::vector<Type> res(size);
		@@ -173,6 +198,11 @@ void execute(const std::string name, Code &&code, int from = 10, int to = 17,
		execRun<Device>("shuffle", code, attempts, row,
		Generators::shuffle<Type>(size));

		execRun<Device>("gaussian", code, attempts, row,
		Generators::gaussian<Type>(size));

		execRun<Device>("zipf", code, attempts, row, Generators::zipf<Type>(size));

		execRun<Device>("almostSorted", code, attempts, row,
		Generators::almostSorted<Type>(size));

benchmark/_common/zipf.hpp

0 → 100644

+141 −0

Original line number	Diff line number	Diff line
		#pragma once

		#include <algorithm>
		#include <cassert>
		#include <cstdint>
		#include <cstdio>
		#include <cstdlib>
		#include <random>
		#include <set>
		#include <vector>

		#include <cmath>
		#include <cstdio>
		#include <cstdlib>
		#include <cstring>

		template <typename T> class Zipf {
		uint64_t n;

		double mTheta;
		double mAlpha;
		double mThres;

		uint64_t mLastN;

		double mDblN;
		double mZetan;
		double mEta;
		uint64_t mRandState;

		static inline double fastRandD(uint64_t *state) {
		state = (state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1);
		return (double)*state / (double)((1UL << 48) - 1);
		}

		// http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/
		static inline double powApprox(double a, double b) {
		union {
		double d;
		int x[2];
		} u = {a};

		u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447);
		u.x[0] = 0;
		return u.d;
		}

		static inline double zeta(uint64_t lastN, double lastSum, uint64_t n,
		double theta) {
		if (lastN > n) {
		lastN = 0;
		lastSum = 0.;
		}

		while (lastN < n) {
		lastSum += 1. / powApprox((double)lastN + 1., theta);
		lastN++;
		}
		return lastSum;
		}

		public:
		Zipf(uint64_t n, double theta, uint64_t randSeed) {
		assert(n > 0);
		if (theta > 0.992 && theta < 1) {
		fprintf(stderr,
		"theta > 0.992 will be inaccurate due to approximation\n");
		}

		if (theta >= 1. && theta < 40.) {
		fprintf(stderr, "theta in [1., 40.) is not supported\n");
		assert(false);
		}

		assert(theta == -1. \|\| (theta >= 0. && theta < 1.) \|\| theta >= 40.);
		assert(randSeed < (1UL << 48));
		memset(this, 0, sizeof(*this));

		this->n = n;
		this->mTheta = theta;

		if (theta == -1.) {
		randSeed = randSeed % n;
		} else if (theta > 0. && theta < 1.) {
		this->mAlpha = 1. / (1. - theta);
		this->mThres = 1. + powApprox(0.5, theta);
		} else {
		this->mAlpha = 0.;
		this->mThres = 0.;
		}

		this->mLastN = 0;
		this->mZetan = 0.;
		this->mRandState = randSeed;
		}

		T next() {
		if (this->mLastN != this->n) {
		if (this->mTheta > 0. && this->mTheta < 1.) {
		this->mZetan = zeta(this->mLastN, this->mZetan, this->n, this->mTheta);
		this->mEta = (1. - powApprox(2. / (double)this->n, 1. - this->mTheta)) /
		(1. - zeta(0, 0., 2, this->mTheta) / this->mZetan);
		}
		this->mLastN = this->n;
		this->mDblN = (double)this->n;
		}

		if (this->mTheta == -1.) {
		T v = this->mRandState;
		if (++this->mRandState >= this->n) {
		this->mRandState = 0;
		}
		return v;
		}

		if (this->mTheta == 0.) {
		double u = fastRandD(&this->mRandState);
		return (T)(this->mDblN * u);
		}

		if (this->mTheta >= 40.) {
		return 0;
		}

		// from J. Gray et al. Quickly generating billion-record synthetic
		// databases. In SIGMOD, 1994.
		// double u = erand48(this->rand_state);
		double u = fastRandD(&this->mRandState);
		double uz = u * this->mZetan;
		if (uz < 1.) {
		return 0;
		}

		if (uz < this->mThres) {
		return 1;
		}

		return (T)(this->mDblN *
		powApprox(this->mEta * (u - 1.) + 1., this->mAlpha));
		}
		};

cuCollections @ 0b672bbd

Original line number	Diff line number	Diff line
		Subproject commit 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700

benchmark/tnl/tnl_cuda.cu

+8 −5

Original line number	Diff line number	Diff line
		#include "../_common/benchmark.hpp"

		#include "../../implementation/BPlusTree/src/BTreeContainer/Default.hpp"
		#include "../../implementation/src/BPlusTree/Default.hpp"
		#include "../../implementation/src/BTreeContainer/Default.hpp"

		#include <TNL/Assert.h>
		#include <TNL/Containers/Array.h>
		@@ -15,15 +16,16 @@ using namespace TNL::Containers;
		int main(int argc, char **argv) {
		using KeyType = uint32_t;
		using ValueType = uint32_t;
		const size_t KeyInf = UINT32_MAX;
		const size_t Order = 15;

		Benchmark::execute<Benchmark::Device::Cuda, KeyType>(
		"tnl::cuda", [](auto &timer, std::vector<KeyType> input) {
		using KeyArray = Containers::Array<ValueType, TNL::Devices::Cuda>;
		using ValueArray = Containers::Array<ValueType, TNL::Devices::Cuda>;
		using Tree = BTreeContainer<KeyType, ValueType, Order, KeyInf,
		TNL::Devices::Cuda>;
		using MaskArray = Containers::Array<bool, TNL::Devices::Cuda>;

		using Tree = BTreeContainer<KeyType, ValueType, Order,
		TNL::Devices::Cuda, BPlusTree>;

		Tree container(getContainerSize(input.size(), Order));

		@@ -37,9 +39,10 @@ int main(int argc, char **argv) {
		timer.stop("insert");

		ValueArray results(input.size());
		MaskArray mask(input.size());

		timer.start();
		container.find(keys, results);
		container.find(keys, results, mask);
		timer.stop("query");
		});