Commit 3df6db3e authored by Tat Dat Duong's avatar Tat Dat Duong
Browse files

chore: clean up all tests

parent 4ac1316f
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
#include "BPlusTree/Default.hpp"
#include "BLinkTree/Default.hpp"
#include "BTreeContainer/Default.hpp"
#include "TNL/Devices/Cuda.h"
#include "benchmark.hpp"
@@ -12,8 +12,7 @@ int main(int argc, char **argv) {
  Benchmark::execute<Benchmark::Device::Cuda, KeyType>(
      "btree_cuda_v7", [](auto &timer, auto input) {
        size_t size = getContainerSize(input.size(), Order);
        BTreeContainer<BNode, BNodeLatch, BPlusTree, KeyType, ValueType, Order, TNL::Devices::Cuda>
            container(size);
        BTreeContainer<KeyType, ValueType, Order, TNL::Devices::Cuda, BLinkTree> container(size);

        Containers::Array<KeyType, TNL::Devices::Cuda> keys(input);
        Containers::Array<ValueType, TNL::Devices::Cuda> values(input);
+2 −3
Original line number Diff line number Diff line
#include "BPlusTree/Default.hpp"
#include "BLinkTree/Default.hpp"
#include "BTreeContainer/Default.hpp"
#include "TNL/Devices/Cuda.h"
#include "benchmark.hpp"
@@ -12,8 +12,7 @@ int main(int argc, char **argv) {
  Benchmark::execute<Benchmark::Device::Host, KeyType>(
      "btree_host_v7", [](auto &timer, auto input) {
        size_t size = getContainerSize(input.size(), Order);
        BTreeContainer<BNode, BNodeLatch, BPlusTree, KeyType, ValueType, Order, TNL::Devices::Host>
            container(size);
        BTreeContainer<KeyType, ValueType, Order, TNL::Devices::Host, BLinkTree> container(size);

        Containers::Array<KeyType, TNL::Devices::Host> keys(input);
        Containers::Array<ValueType, TNL::Devices::Host> values(input);
+33 −0
Original line number Diff line number Diff line
#pragma once
#include <TNL/Assert.h>
#include <TNL/Cuda/CudaCallable.h>
#include <TNL/TypeTraits.h>
#include <cstddef>
#include <stdint.h>

template <typename KeyType, typename ValueType, size_t Order> struct BLinkNode {
  BLinkNode *volatile mSibling;

  KeyType mHighKey;
  uint8_t mHighKeyFlag;

  uint8_t mLeaf;
  uint8_t mSize;

  int mWriteLock = false;

  KeyType mKeys[Order];
  ValueType mValues[Order];
  BLinkNode *volatile mChildren[Order];

  __cuda_callable__ inline bool leaf() { return this->mLeaf; }

  __cuda_callable__ inline size_t size() { return this->mSize; }

  __cuda_callable__ inline size_t childSize() {
    if (this->mLeaf == false && this->mSize > 0) {
      return this->mSize + 1;
    }
    return this->mSize;
  }
};
 No newline at end of file
+431 −0
Original line number Diff line number Diff line
#pragma once
#include <TNL/Cuda/CudaCallable.h>

#include <algorithm>
#include <cstdio>
#include <iostream>
#include <sstream>
#include <string>

#include <cooperative_groups.h>

#include "../Constants.hpp"
#include "../Utils/BumpAllocator.hpp"
#include "../Utils/Debugger.hpp"
#include "../Utils/Print.hpp"

#include "../BNodeLatch/Default.hpp"
#include "./BNodeOperations/Default.hpp"
#include "TNL/Assert.h"

template <typename KeyType, typename ValueType, size_t Order, typename Device> class BLinkTree;

template <typename KeyType, typename ValueType, size_t Order>
class BLinkTree<KeyType, ValueType, Order, Devices::Cuda> {
  using Device = Devices::Cuda;
  using BNode = BLinkNode<KeyType, ValueType, Order>;
  using Latch = BNodeLatch<BNode, Device>;
  using Allocator = BumpAllocator<BNode, Device>;
  using _Operations = BLinkOperations<KeyType, ValueType, Order, Device>;

  template <typename Warp>
  __device__ static inline BNode *scan(BNode *node, KeyType key, Warp &warp) {

    if (node->mHighKeyFlag && key >= node->mHighKey) {
      return node->mSibling;
    }

    size_t size = node->size();
    size_t childSize = node->childSize();
    size_t targetIndex = upperBoundWarp(node->mKeys, size, key, warp);

    if (targetIndex >= childSize) {
      return nullptr;
    }

    BNode *result = node->mChildren[targetIndex];
    return result;
  }

  template <typename Warp>
  __device__ static inline BNode *moveSide(BNode *cursor, KeyType key, Warp &warp) {
    BNode *tmp, *result = cursor;
    while (result != nullptr && result->mSibling != nullptr &&
           (tmp = scan(result, key, warp)) == result->mSibling) {
      result = tmp;
    }

    return result;
  }

  template <typename Warp>
  __device__ static inline BNode *findLeaf(BNode *root, KeyType key, Warp &warp) {
    BNode *cursor = root;
    while (cursor != nullptr && cursor->leaf() == false) {
      cursor = scan(cursor, key, warp);
    }

    cursor = moveSide(cursor, key, warp);
    return cursor;
  }

  template <typename Warp>
  __device__ static BNode *splitNode(BNode *cursor, Allocator &alloc, size_t siblingStart,
                                     size_t cursorCount, Warp &warp) {

    BNode *sibling = nullptr;
    auto rank = warp.thread_rank();

    if (rank == 0) {
      sibling = alloc.allocate();
    }

    _Operations::init(warp, sibling, cursor);
    warp.sync();

    sibling = warp.shfl(sibling, 0);

    size_t size = cursor->size();
    size_t childSize = cursor->childSize();
    bool isLeaf = sibling->leaf();

    if (rank >= siblingStart) {
      if (rank < childSize) {
        sibling->mChildren[rank - siblingStart] = cursor->mChildren[rank];
      }

      if (rank < size) {
        sibling->mKeys[rank - siblingStart] = cursor->mKeys[rank];
        sibling->mValues[rank - siblingStart] = cursor->mValues[rank];
        sibling->mSize = size - siblingStart;
      }
    }

    cursor->mSize = cursorCount;
    cursor->mSibling = sibling;
    // wait until all sibling have written
    // warp.sync();
    cursor->mHighKey = sibling->mKeys[0];
    cursor->mHighKeyFlag = true;

    if (warp.thread_rank() == 0) {
      DBG_SPLIT(cursor, siblingStart, cursorCount, sibling);
    }

    return sibling;
  }

  template <typename Warp>
  __device__ static inline void insertIntoFreeNode(BNode *curr, KeyType insertKey, ValueType value,
                                                   BNode *insertNode, Warp &warp) {
    auto rank = warp.thread_rank();
    size_t keyIdx = upperBoundWarp(curr->mKeys, curr->size(), insertKey, warp);

    bool isLeaf = curr->leaf();
    size_t childSize = curr->childSize();

    if (isLeaf) {
      if (rank == 0) {
        DBG_INSERT_LEAF(curr, insertKey, keyIdx);
      }
      _Operations::insertChild(curr, keyIdx, insertNode, childSize, warp);
      _Operations::insertKey(curr, keyIdx, insertKey, value, warp);
    } else {
      if (rank == 0) {
        DBG_INSERT_INTERNAL(curr, insertKey, insertNode, keyIdx, (keyIdx + 1));
      }
      _Operations::insertChild(curr, keyIdx + 1, insertNode, childSize, warp);
      _Operations::insertKey(curr, keyIdx, insertKey, warp);
    }

    warp.sync();
  }

  template <typename Warp>
  __device__ static inline BNode *increaseTreeHeight(BNode *leftNode, KeyType insertKey,
                                                     BNode *rightNode, Allocator &alloc,
                                                     Warp &warp) {

    BNode *rootNode = nullptr;
    auto rank = warp.thread_rank();
    if (rank == 0) {
      rootNode = alloc.allocate();
    }
    rootNode = warp.shfl(rootNode, 0);

    _Operations::init(warp, rootNode, false, nullptr, false);
    warp.sync();

    if (rank == 0) {
      rootNode->mKeys[rank] = insertKey;
      rootNode->mChildren[rank] = leftNode;
    }
    if (rank == 1) {
      rootNode->mChildren[rank] = rightNode;
    }
    rootNode->mSize = 1;

    if (rank == 0) {
      DBG_INCREASE_HEIGHT(leftNode, rootNode, rightNode, insertKey);
    }

    return rootNode;
  }

  template <typename Warp>
  __device__ static inline bool attemptLatchWarp(BNode *node, Latch &latch, Warp &warp) {
    bool attempt = false;

    if (warp.thread_rank() == 0) {
      attempt = latch.attempt(node);
    }

    attempt = warp.any(attempt);
    return attempt;
  }

  template <typename Warp>
  __device__ static inline void releaseLatchWarp(BNode *node, Latch &latch, Warp &warp) {
    if (warp.thread_rank() == 0) {
      latch.release(node);
    }

    warp.sync();
  }

  template <typename Warp>
  __device__ static inline bool verifyParent(BNode *curr, BNode *prev, Warp &warp) {

    auto rank = warp.thread_rank();
    size_t size = prev->childSize();

    bool found = false;
    if (rank < size) {
      found = prev->mChildren[rank] == curr;
    }

    return warp.any(found);
  }

  template <typename Warp>
  __device__ static inline size_t lowerBoundWarp(KeyType *haystack, size_t haystackSize,
                                                 KeyType needle, Warp &warp) {
    auto rank = warp.thread_rank();
    TNL_ASSERT_GE(warp.size(), Order, "Warp must be larger than order");

    bool matches = false;
    if (rank < haystackSize) {
      matches = haystack[rank] >= needle;
    }

    auto ballot = warp.ballot(matches);
    size_t res = __ffs(ballot) - 1;
    if (res > haystackSize)
      return haystackSize;
    return res;
  }

  template <typename Warp>
  __device__ static inline size_t upperBoundWarp(KeyType *haystack, size_t haystackSize,
                                                 KeyType needle, Warp &warp) {
    auto rank = warp.thread_rank();
    TNL_ASSERT_GE(warp.size(), Order, "Warp must be larger than order");

    bool matches = false;
    if (rank < haystackSize) {
      matches = haystack[rank] > needle;
    }

    auto ballot = warp.ballot(matches);
    size_t res = __ffs(ballot) - 1;
    if (res > haystackSize)
      return haystackSize;
    return res;
  }

public:
  using Node = BNode;
  using Operations = _Operations;

  template <typename Warp> __device__ static inline BNode *init(Allocator &alloc, Warp &warp) {
    BNode *root = alloc.allocate();
    _Operations::init(warp, root, true, nullptr, false);

    DBG_INIT(root, Order);
    return root;
  }

  template <typename Warp>
  __device__ static inline bool find(BNode *root, KeyType key, ValueType &result, Warp &warp) {
    auto rank = warp.thread_rank();

    BNode *next = root;
    while (next) {
      bool isLeaf = next->leaf();
      size_t size = next->size();

      KeyType nextKey = next->mKeys[rank];
      uint32_t ballot = warp.ballot(nextKey >= key);
      size_t targetIdx = __ffs(ballot) - 1;

      targetIdx = (targetIdx > size) ? size : targetIdx;
      KeyType targetKey = warp.shfl(nextKey, targetIdx);

      if (isLeaf) {
        if (targetIdx < size && targetKey == key) {
          result = next->mValues[targetIdx];
          return true;
        }
        return false;
      }

      // assumption: the tree is fully constructed and insert / splits are done
      next = next->mChildren[targetIdx + (targetKey == key)];
    }

    return false;
  }

  template <typename Warp>
  __device__ static bool insert(BNode *&root, KeyType key, ValueType value, Allocator &alloc,
                                Latch &latch, Warp &warp) {
    BNode *prev = nullptr, *curr = nullptr;

    do {
      __threadfence();

      BNode *tmpPrev = prev;
      if (curr == nullptr) {
        curr = root;
      } else {
        prev = curr;
        curr = scan(curr, key, warp);
      }

      if (curr != nullptr && curr->size() + !curr->leaf() >= Order) {
        // if node is full, we haven't propagated, restart the operation
        if (prev && curr != root && prev->mSibling == curr) {
          return false;
        }

        if (attemptLatchWarp(curr, latch, warp) == false) {
          return false;
        }

        size_t medianIdx = curr->size() / 2;
        size_t splitIdx = medianIdx + (!curr->leaf());
        KeyType medianKey = curr->mKeys[medianIdx];

        if (curr == root) {
          BNode *split = splitNode(curr, alloc, splitIdx, medianIdx, warp);
          __threadfence();

          BNode *newRoot = increaseTreeHeight(curr, medianKey, split, alloc, warp);

          __threadfence();
          releaseLatchWarp(curr, latch, warp);

          root = newRoot;
          curr = prev;
          prev = tmpPrev;
        } else {
          // latch the parent too
          if (attemptLatchWarp(prev, latch, warp) == false) {
            releaseLatchWarp(curr, latch, warp);
            return false;
          }

          if (prev && verifyParent(curr, prev, warp) == false) {
            releaseLatchWarp(curr, latch, warp);
            releaseLatchWarp(prev, latch, warp);
            return false;
          }

          // verify, if the parent has not subsequently went full (esp. when )
          if (prev->size() + !prev->leaf() >= Order) {
            releaseLatchWarp(curr, latch, warp);
            releaseLatchWarp(prev, latch, warp);
            return false;
          }

          BNode *split = splitNode(curr, alloc, splitIdx, medianIdx, warp);

          __threadfence();
          releaseLatchWarp(curr, latch, warp);

          insertIntoFreeNode(prev, medianKey, 0, split, warp);

          __threadfence();
          releaseLatchWarp(prev, latch, warp);

          curr = prev;
          prev = tmpPrev;
        }
      }
    } while (curr != nullptr && curr->leaf() == false);

    if (curr == nullptr) {
      return false;
    }

    if (attemptLatchWarp(curr, latch, warp) == false) {
      return false;
    }

    BNode *tmp;
    while (curr->mSibling != nullptr && (tmp = scan(curr, key, warp)) == curr->mSibling) {
      bool attempt = attemptLatchWarp(tmp, latch, warp);
      releaseLatchWarp(curr, latch, warp);

      if (attempt == false) {
        return false;
      }
      curr = tmp;
    }

    // here we don't check if the node has subsequently become full
    if (curr->size() + !curr->leaf() >= Order) {
      releaseLatchWarp(curr, latch, warp);
      return false;
    }

    insertIntoFreeNode(curr, key, value, nullptr, warp);

    __threadfence();
    releaseLatchWarp(curr, latch, warp);
    return true;
  }

  template <typename Warp>
  __device__ static inline bool remove(BNode *root, KeyType key, Latch &latch, Warp &warp) {
    BNode *cursor = findLeaf(root, key, warp);

    if (cursor == nullptr) {
      return true;
    }

    if (attemptLatchWarp(cursor, latch, warp) == false) {
      return false;
    }

    __threadfence();

    // remove item from keys and remove the respective node
    size_t keyIdx = lowerBoundWarp(cursor->mKeys, cursor->size(), key, warp);
    if (keyIdx >= cursor->size() || cursor->mKeys[keyIdx] != key) {
      releaseLatchWarp(cursor, latch, warp);
      return true;
    }

    _Operations::removeChild(cursor, keyIdx, cursor->childSize(), warp);
    _Operations::removeKey(cursor, keyIdx, warp);
    warp.sync();

    if (warp.thread_rank() == 0) {
      DBG_DELETE(cursor, keyIdx);
    }

    releaseLatchWarp(cursor, latch, warp);
    return true;
  }
};
+229 −0
Original line number Diff line number Diff line
#pragma once
#define BTREE_HOST 1

#include <TNL/Cuda/CudaCallable.h>

#include <algorithm>
#include <iostream>
#include <sstream>
#include <string>

#include "../Constants.hpp"
#include "../Utils/Algorithms.hpp"
#include "../Utils/BumpAllocator.hpp"
#include "../Utils/Print.hpp"

#include "../BNodeLatch/Default.hpp"
#include "./BLinkNode.hpp"
#include "./BNodeOperations/Default.hpp"

template <typename KeyType, typename ValueType, size_t Order, typename Device> class BLinkTree;

template <typename KeyType, typename ValueType, size_t Order>
class BLinkTree<KeyType, ValueType, Order, Devices::Host> {
  using Device = Devices::Host;
  using BNode = BLinkNode<KeyType, ValueType, Order>;
  using Latch = BNodeLatch<BNode, Device>;
  using Allocator = BumpAllocator<BNode, Device>;
  using _Operations = BLinkOperations<KeyType, ValueType, Order, Device>;

  __cuda_callable__ static inline BNode *scan(BNode *node, KeyType key) {
    if (node->mHighKeyFlag && key >= node->mHighKey) {
      return node->mSibling;
    }

    size_t size = node->size();
    size_t childSize = node->childSize();
    size_t targetIndex = upperBound(node->mKeys, size, key);

    if (targetIndex == node->childSize()) {
      return nullptr;
    }

    BNode *result = node->mChildren[targetIndex];
    return result;
  }

  __cuda_callable__ static inline BNode *moveSide(BNode *cursor, KeyType key) {
    BNode *tmp, *result = cursor;
    while (result != nullptr && result->mSibling != nullptr &&
           (tmp = scan(result, key)) == result->mSibling) {
      result = tmp;
    }

    return result;
  }

  __cuda_callable__ static inline BNode *findLeaf(BNode *root, KeyType key) {
    BNode *cursor = root;
    while (cursor != nullptr && cursor->leaf() == false) {
      cursor = scan(cursor, key);
    }

    cursor = moveSide(cursor, key);
    return cursor;
  }

  __cuda_callable__ static BNode *splitNode(BNode *cursor, Allocator &alloc, size_t siblingStart,
                                            size_t cursorCount) {

    BNode *sibling = alloc.allocate();
    _Operations::init(sibling, cursor);

    for (size_t i = siblingStart; i < cursor->childSize(); ++i) {
      BNode *node = cursor->mChildren[i];
      _Operations::appendChild(sibling, node, i - siblingStart);
    }

    for (size_t i = siblingStart; i < cursor->size(); ++i) {
      if (sibling->leaf()) {
        _Operations::appendKey(sibling, cursor->mKeys[i], cursor->mValues[i]);
      } else {
        _Operations::appendKey(sibling, cursor->mKeys[i]);
      }
    }

    cursor->mSize = cursorCount;
    cursor->mSibling = sibling;
    cursor->mHighKey = sibling->mKeys[0];
    cursor->mHighKeyFlag = true;

    return sibling;
  }

  __cuda_callable__ static inline void insertIntoFreeNode(BNode *curr, KeyType insertKey,
                                                          ValueType value, BNode *insertNode) {
    size_t keyIdx = upperBound(curr->mKeys, curr->size(), insertKey);

    if (curr->leaf()) {
      _Operations::insertChild(curr, keyIdx, insertNode, curr->childSize());
      _Operations::insertKey(curr, keyIdx, insertKey, value);
    } else {
      _Operations::insertChild(curr, keyIdx + 1, insertNode, curr->childSize());
      _Operations::insertKey(curr, keyIdx, insertKey);
    }
  }

  __cuda_callable__ static inline void increaseTreeHeight(BNode *curr, KeyType insertKey,
                                                          BNode *insertNode, Allocator &alloc) {

    BNode *leftNode = alloc.allocate();

    // copy the entire node to the element
    _Operations::init(leftNode, curr);
    for (size_t childKey = 0; childKey < curr->childSize(); ++childKey) {
      BNode *child = curr->mChildren[childKey];
      _Operations::appendChild(leftNode, child, childKey);

      if (childKey < curr->size()) {
        if (leftNode->leaf()) {
          _Operations::appendKey(leftNode, curr->mKeys[childKey], curr->mValues[childKey]);
        } else {
          _Operations::appendKey(leftNode, curr->mKeys[childKey]);
        }
      }
    }

    // this might cause some issues in synchronization
    _Operations::init(curr, false, nullptr, false);
    _Operations::appendChild(curr, leftNode, 0);
    _Operations::appendKey(curr, insertKey);
    _Operations::appendChild(curr, insertNode, 1);
  }

public:
  using Node = BNode;
  using Operations = _Operations;

  __cuda_callable__ static inline BNode *init(Allocator &alloc) {
    BNode *root = alloc.allocate();
    _Operations::init(root, true, nullptr, false);
    return root;
  }

  __cuda_callable__ static inline bool find(BNode *root, KeyType key, ValueType &result) {
    BNode *leaf = findLeaf(root, key);
    if (leaf == nullptr)
      return false;

    size_t it = lowerBound(leaf->mKeys, leaf->size(), key);
    if (it < leaf->size() && leaf->mKeys[it] == key) {
      result = leaf->mValues[it];
      return true;
    }
    return false;
  }

  __cuda_callable__ static bool insert(BNode *root, KeyType key, ValueType value, Allocator &alloc,
                                       Latch &latch) {
    BNode *prev = nullptr, *curr = nullptr;
    do {
      BNode *tmpPrev = prev;
      if (curr == nullptr) {
        curr = root;
      } else {
        prev = curr;
        curr = scan(curr, key);
      }

      if (curr != nullptr && curr->size() + !curr->leaf() >= Order) {
        // if node is full, we haven't propagated, restart the operation
        if (curr->mSibling == prev && curr != root) {
          return false;
        }

        size_t medianIdx = curr->size() / 2;
        size_t splitIdx = medianIdx + (!curr->leaf());

        KeyType medianKey = curr->mKeys[medianIdx];
        BNode *split = nullptr;

        split = splitNode(curr, alloc, splitIdx, medianIdx);

        if (curr == root) {
          increaseTreeHeight(curr, medianKey, split, alloc);
        } else {
          insertIntoFreeNode(prev, medianKey, 0, split);
          curr = prev;
          prev = tmpPrev;
        }
      }
    } while (curr != nullptr && curr->leaf() == false);

    if (curr == nullptr) {
      return false;
    }

    BNode *tmp;
    while (curr->mSibling != nullptr && (tmp = scan(curr, key)) == curr->mSibling) {
      curr = tmp;
    }

    insertIntoFreeNode(curr, key, value, nullptr);
    return true;
  }

  __cuda_callable__ static inline bool remove(BNode *root, KeyType key, Latch &latch) {
    BNode *cursor = findLeaf(root, key);

    if (cursor == nullptr)
      return true;

    if (!latch.attempt(cursor)) {
      return false;
    }

    // remove item from keys and remove the respective node
    size_t keyIdx = lowerBound(cursor->mKeys, cursor->size(), key);
    if (keyIdx == cursor->size() || cursor->mKeys[keyIdx] != key) {
      latch.release(cursor);
      return true;
    }

    _Operations::removeChild(cursor, keyIdx, cursor->childSize());
    _Operations::removeKey(cursor, keyIdx);

    latch.release(cursor);
    return true;
  }
};
 No newline at end of file
Loading