Loading HashGraph/HashGraphV2/HashGraphV2.h +22 −9 Original line number Diff line number Diff line #pragma once #include "../../HashFunction.h" #include "HashGraphV2View.h" #include <TNL/Containers/Array.h> Loading @@ -10,21 +11,33 @@ using namespace TNL::Containers; template<typename Item, typename Key> template<typename Item, typename Key, typename Device> class HashGraphV2 { using Self = HashGraphV2<Item, Key, Device>; using ViewType = HashGraphV2View<Item, Key, Device>; friend ViewType::HashGraphV2View(Self*); public: int duplicates() const; void debug_print() const { for (int i = 0; i < m_content.getSize(); i++) std::cout << m_content.getElement(i).key << ' '; std::cout << std::endl; std::cout << m_items << std::endl; std::cout << m_offset << std::endl; } protected: HashGraphV2(const Array<Item>& items, const Item& defItem); HashGraphV2(const Array<Item, Device>& items); ~HashGraphV2(); bool find(const Key& key, Item* item = nullptr) const; const Item m_default; bool find(const Key& key, ArrayView<Item, Device> item) const; private: void fill_offset(const Array<int>& counter, Array<int>& offset); void fill_offset(const Array<int, Device>& counter, Array<int, Device>& offset); Array<Item> m_items; Array<int> m_offset; HashFunction<Key> m_hash; Array<Item, Device> m_content; Array<int, Device> m_items; Array<int, Device> m_offset; ViewType* m_view; }; Loading HashGraph/HashGraphV2/HashGraphV2.hpp +28 −29 Original line number Diff line number Diff line #pragma once #include "HashGraphV2.h" template<typename Item, typename Key> HashGraphV2<Item, Key>::HashGraphV2(const Array<Item>& items, const Item& defItem) : m_default(defItem), m_hash(items.getSize()) { template<typename Item, typename Key, typename Device> HashGraphV2<Item, Key, Device>::HashGraphV2(const Array<Item, Device>& items) : m_content(items), m_items(items.getSize()), m_offset((1 << int(ceil(log2(m_content.getSize())))) + 1), m_view(new ViewType(this)) { /*int nBins = items.getSize() / 16; int binSize = items.getSize() / nBins + (items.getSize() % nBins > 0); int tableSize = nBins * binSize; Loading Loading @@ -48,33 +52,28 @@ HashGraphV2<Item, Key>::HashGraphV2(const Array<Item>& items, const Item& defIte items[i].~Item();*/ } template<typename Item, typename Key> HashGraphV2<Item, Key>::~HashGraphV2() { for (int i = 0; i < m_items.getSize(); i++) m_items[i].~Item(); template<typename Item, typename Key, typename Device> HashGraphV2<Item, Key, Device>::~HashGraphV2() { delete m_view; } template<typename Item, typename Key> bool HashGraphV2<Item, Key>::find(const Key& key, Item* item) const { int hash = m_hash(key); int end = m_offset[hash + 1]; for (int i = m_offset[hash]; i < end; i++) { if (m_items[i].key() == key) { if (item) *item = m_items[i]; return true; } } return false; template<typename Item, typename Key, typename Device> bool HashGraphV2<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { return m_view->find(key, item); } template<typename Item, typename Key> void HashGraphV2<Item, Key>::fill_offset(const Array<int>& counter, Array<int>& offset) { int latestOffset = 0; offset.setSize(counter.getSize() + 1); for (int i = 0; i < counter.getSize(); i++) { offset[i] = latestOffset; latestOffset += counter[i]; } offset[counter.getSize()] = latestOffset; template<typename Item, typename Key, typename Device> int HashGraphV2<Item, Key, Device>::duplicates() const { return -1; } // template<typename Item, typename Key> // void HashGraphV2<Item, Key, Device>::fill_offset(const Array<int>& counter, Array<int>& offset) { // int latestOffset = 0; // offset.setSize(counter.getSize() + 1); // for (int i = 0; i < counter.getSize(); i++) { // offset[i] = latestOffset; // latestOffset += counter[i]; // } // offset[counter.getSize()] = latestOffset; // } HashGraph/HashGraphV2/HashGraphV2View.h 0 → 100644 +30 −0 Original line number Diff line number Diff line #pragma once #include "HashGraphV2.h" #include <TNL/Containers/ArrayView.h> using namespace TNL::Containers; template<typename Item, typename Key, typename Device> class HashGraphV2; template<typename Item, typename Key, typename Device> class HashGraphV2View { using TableType = HashGraphV2<Item, Key, Device>; public: void build(); HashGraphV2View(TableType* table); ~HashGraphV2View(); bool find(const Key& key, ArrayView<Item, Device> item) const; void fill_offset(const ArrayView<int, Device>& counter, ArrayView<int, Device> offset); private: ArrayView<Item, Device> m_content; ArrayView<int, Device> m_items; ArrayView<int, Device> m_offset; HashFunction<Key> m_hash; }; #include "HashGraphV2View.hpp" HashGraph/HashGraphV2/HashGraphV2View.hpp 0 → 100644 +134 −0 Original line number Diff line number Diff line #pragma once #include "HashGraphV2View.h" #include <TNL/Algorithms/ParallelFor.h> template<typename Item, typename Key, typename Device> HashGraphV2View<Item, Key, Device>::HashGraphV2View(TableType* table) : m_content(table->m_content.getView()), m_items(table->m_items.getView()), m_offset(table->m_offset.getView()), m_hash(31, 10538, m_content.getSize()) { build(); } template<typename Item, typename Key, typename Device> void HashGraphV2View<Item, Key, Device>::build() { // Phase 1 int nBins = int(ceil(double(m_content.getSize()) / 16)); int binSize = int(ceil(double(m_content.getSize()) / nBins)); int tableSize = nBins * binSize; Array<int, Device> bcArr(nBins, 0); auto binCounter = bcArr.getView(); auto content = m_content; auto hash = m_hash; Array<int, Device> hArr(m_content.getSize()); auto hashes = hArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, hash, content] __cuda_callable__ (int i) mutable { hashes[i] = hash(content[i].key); }); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, binSize, binCounter] __cuda_callable__ (int i) mutable { int bin = hashes[i] / binSize; #ifdef __CUDA_ARCH__ atomicAdd(&binCounter[bin], 1); #else binCounter[bin]++; #endif }); // Phase 2 Array<int, Device> boArr((1 << int(ceil(log2(nBins)))) + 1, 0); auto binOffset = boArr.getView(); fill_offset(binCounter, binOffset); binCounter.setValue(0); Array<int, Device> reArr(m_content.getSize()); auto reorg = reArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, binSize, binCounter, binOffset, reorg] __cuda_callable__ (int i) mutable { int bin = hashes[i] / binSize; #ifdef __CUDA_ARCH__ int pos = atomicAdd(&binCounter[bin], 1) + binOffset[bin]; atomicExch(&reorg[pos], i); #else int pos = binCounter[bin]++ + binOffset[bin]; reorg[pos] = i; #endif }); // Phase 3 Array<int, Device> cnArr(m_content.getSize(), 0); auto counter = cnArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, reorg, counter] __cuda_callable__ (int i) mutable { int pos = hashes[reorg[i]]; #ifdef __CUDA_ARCH__ atomicAdd(&counter[pos], 1); #else counter[pos]++; #endif }); Array<int, Device> ofArr(); auto offset = m_offset; fill_offset(counter, offset); counter.setValue(0); auto items = m_items; TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [counter, hashes, items, offset] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1); atomicExch(&items[pos], i); #else int pos = offset[hashes[i]] + counter[hashes[i]]++; items[pos] = i; #endif }); } template<typename Item, typename Key, typename Device> HashGraphV2View<Item, Key, Device>::~HashGraphV2View() {} template<typename Item, typename Key, typename Device> bool HashGraphV2View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { int hash = m_hash(key); int end = m_offset.getElement(hash + 1); auto content = m_content; Array<int, Device> result(1); auto rView = result.getView(); rView.setValue(-1); auto items = m_items; auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable { if (content[items[i]].key == key) { rView[0] = items[i]; item[0] = content[items[i]]; } }; TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find); return rView.getElement(0) > -1; } template<typename Item, typename Key, typename Device> void HashGraphV2View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter, ArrayView<int, Device> offset) { auto fill = [offset, counter] __cuda_callable__ (int i) mutable { offset[i] = i >= counter.getSize() ? 0 : counter[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), fill); auto reduce = [offset] __cuda_callable__ (int k, int d) mutable { if (k % (1 << (d + 1)) == 0) offset[k + (1 << (d + 1)) - 1] = offset[k + (1 << d) - 1] + offset[k + (1 << (d + 1)) - 1]; }; for (int d = 0; d < log2(offset.getSize() - 1); d++) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize() - 1, reduce, d); offset.setElement(offset.getSize() - 1, offset.getElement(offset.getSize() - 2)); offset.setElement(offset.getSize() - 2, 0); auto up_sweep = [offset] __cuda_callable__ (int r, int d) mutable { if ((offset.getSize() - 2 - r) % (1 << d) == 0) { int l = r - (1 << (d - 1)); int t = offset[r]; offset[r] += offset[l]; offset[l] = t; } }; for (int d = log2(offset.getSize() - 1); d > 0; d--) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize() - 1, up_sweep, d); } Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -37,6 +37,8 @@ HEADERS := CuckooHash/CuckooHashMap.h\ HashGraph/HashGraphV1/HashGraphV1View.hpp\ HashGraph/HashGraphV2/HashGraphV2.h\ HashGraph/HashGraphV2/HashGraphV2.hpp\ HashGraph/HashGraphV2/HashGraphV2View.h\ HashGraph/HashGraphV2/HashGraphV2View.hpp\ HashFunction.h\ HashFunction.hpp\ Pair.hpp\ Loading Loading
HashGraph/HashGraphV2/HashGraphV2.h +22 −9 Original line number Diff line number Diff line #pragma once #include "../../HashFunction.h" #include "HashGraphV2View.h" #include <TNL/Containers/Array.h> Loading @@ -10,21 +11,33 @@ using namespace TNL::Containers; template<typename Item, typename Key> template<typename Item, typename Key, typename Device> class HashGraphV2 { using Self = HashGraphV2<Item, Key, Device>; using ViewType = HashGraphV2View<Item, Key, Device>; friend ViewType::HashGraphV2View(Self*); public: int duplicates() const; void debug_print() const { for (int i = 0; i < m_content.getSize(); i++) std::cout << m_content.getElement(i).key << ' '; std::cout << std::endl; std::cout << m_items << std::endl; std::cout << m_offset << std::endl; } protected: HashGraphV2(const Array<Item>& items, const Item& defItem); HashGraphV2(const Array<Item, Device>& items); ~HashGraphV2(); bool find(const Key& key, Item* item = nullptr) const; const Item m_default; bool find(const Key& key, ArrayView<Item, Device> item) const; private: void fill_offset(const Array<int>& counter, Array<int>& offset); void fill_offset(const Array<int, Device>& counter, Array<int, Device>& offset); Array<Item> m_items; Array<int> m_offset; HashFunction<Key> m_hash; Array<Item, Device> m_content; Array<int, Device> m_items; Array<int, Device> m_offset; ViewType* m_view; }; Loading
HashGraph/HashGraphV2/HashGraphV2.hpp +28 −29 Original line number Diff line number Diff line #pragma once #include "HashGraphV2.h" template<typename Item, typename Key> HashGraphV2<Item, Key>::HashGraphV2(const Array<Item>& items, const Item& defItem) : m_default(defItem), m_hash(items.getSize()) { template<typename Item, typename Key, typename Device> HashGraphV2<Item, Key, Device>::HashGraphV2(const Array<Item, Device>& items) : m_content(items), m_items(items.getSize()), m_offset((1 << int(ceil(log2(m_content.getSize())))) + 1), m_view(new ViewType(this)) { /*int nBins = items.getSize() / 16; int binSize = items.getSize() / nBins + (items.getSize() % nBins > 0); int tableSize = nBins * binSize; Loading Loading @@ -48,33 +52,28 @@ HashGraphV2<Item, Key>::HashGraphV2(const Array<Item>& items, const Item& defIte items[i].~Item();*/ } template<typename Item, typename Key> HashGraphV2<Item, Key>::~HashGraphV2() { for (int i = 0; i < m_items.getSize(); i++) m_items[i].~Item(); template<typename Item, typename Key, typename Device> HashGraphV2<Item, Key, Device>::~HashGraphV2() { delete m_view; } template<typename Item, typename Key> bool HashGraphV2<Item, Key>::find(const Key& key, Item* item) const { int hash = m_hash(key); int end = m_offset[hash + 1]; for (int i = m_offset[hash]; i < end; i++) { if (m_items[i].key() == key) { if (item) *item = m_items[i]; return true; } } return false; template<typename Item, typename Key, typename Device> bool HashGraphV2<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { return m_view->find(key, item); } template<typename Item, typename Key> void HashGraphV2<Item, Key>::fill_offset(const Array<int>& counter, Array<int>& offset) { int latestOffset = 0; offset.setSize(counter.getSize() + 1); for (int i = 0; i < counter.getSize(); i++) { offset[i] = latestOffset; latestOffset += counter[i]; } offset[counter.getSize()] = latestOffset; template<typename Item, typename Key, typename Device> int HashGraphV2<Item, Key, Device>::duplicates() const { return -1; } // template<typename Item, typename Key> // void HashGraphV2<Item, Key, Device>::fill_offset(const Array<int>& counter, Array<int>& offset) { // int latestOffset = 0; // offset.setSize(counter.getSize() + 1); // for (int i = 0; i < counter.getSize(); i++) { // offset[i] = latestOffset; // latestOffset += counter[i]; // } // offset[counter.getSize()] = latestOffset; // }
HashGraph/HashGraphV2/HashGraphV2View.h 0 → 100644 +30 −0 Original line number Diff line number Diff line #pragma once #include "HashGraphV2.h" #include <TNL/Containers/ArrayView.h> using namespace TNL::Containers; template<typename Item, typename Key, typename Device> class HashGraphV2; template<typename Item, typename Key, typename Device> class HashGraphV2View { using TableType = HashGraphV2<Item, Key, Device>; public: void build(); HashGraphV2View(TableType* table); ~HashGraphV2View(); bool find(const Key& key, ArrayView<Item, Device> item) const; void fill_offset(const ArrayView<int, Device>& counter, ArrayView<int, Device> offset); private: ArrayView<Item, Device> m_content; ArrayView<int, Device> m_items; ArrayView<int, Device> m_offset; HashFunction<Key> m_hash; }; #include "HashGraphV2View.hpp"
HashGraph/HashGraphV2/HashGraphV2View.hpp 0 → 100644 +134 −0 Original line number Diff line number Diff line #pragma once #include "HashGraphV2View.h" #include <TNL/Algorithms/ParallelFor.h> template<typename Item, typename Key, typename Device> HashGraphV2View<Item, Key, Device>::HashGraphV2View(TableType* table) : m_content(table->m_content.getView()), m_items(table->m_items.getView()), m_offset(table->m_offset.getView()), m_hash(31, 10538, m_content.getSize()) { build(); } template<typename Item, typename Key, typename Device> void HashGraphV2View<Item, Key, Device>::build() { // Phase 1 int nBins = int(ceil(double(m_content.getSize()) / 16)); int binSize = int(ceil(double(m_content.getSize()) / nBins)); int tableSize = nBins * binSize; Array<int, Device> bcArr(nBins, 0); auto binCounter = bcArr.getView(); auto content = m_content; auto hash = m_hash; Array<int, Device> hArr(m_content.getSize()); auto hashes = hArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, hash, content] __cuda_callable__ (int i) mutable { hashes[i] = hash(content[i].key); }); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, binSize, binCounter] __cuda_callable__ (int i) mutable { int bin = hashes[i] / binSize; #ifdef __CUDA_ARCH__ atomicAdd(&binCounter[bin], 1); #else binCounter[bin]++; #endif }); // Phase 2 Array<int, Device> boArr((1 << int(ceil(log2(nBins)))) + 1, 0); auto binOffset = boArr.getView(); fill_offset(binCounter, binOffset); binCounter.setValue(0); Array<int, Device> reArr(m_content.getSize()); auto reorg = reArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, binSize, binCounter, binOffset, reorg] __cuda_callable__ (int i) mutable { int bin = hashes[i] / binSize; #ifdef __CUDA_ARCH__ int pos = atomicAdd(&binCounter[bin], 1) + binOffset[bin]; atomicExch(&reorg[pos], i); #else int pos = binCounter[bin]++ + binOffset[bin]; reorg[pos] = i; #endif }); // Phase 3 Array<int, Device> cnArr(m_content.getSize(), 0); auto counter = cnArr.getView(); TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [hashes, reorg, counter] __cuda_callable__ (int i) mutable { int pos = hashes[reorg[i]]; #ifdef __CUDA_ARCH__ atomicAdd(&counter[pos], 1); #else counter[pos]++; #endif }); Array<int, Device> ofArr(); auto offset = m_offset; fill_offset(counter, offset); counter.setValue(0); auto items = m_items; TNL::Algorithms::ParallelFor<Device>::exec(0, m_content.getSize(), [counter, hashes, items, offset] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1); atomicExch(&items[pos], i); #else int pos = offset[hashes[i]] + counter[hashes[i]]++; items[pos] = i; #endif }); } template<typename Item, typename Key, typename Device> HashGraphV2View<Item, Key, Device>::~HashGraphV2View() {} template<typename Item, typename Key, typename Device> bool HashGraphV2View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { int hash = m_hash(key); int end = m_offset.getElement(hash + 1); auto content = m_content; Array<int, Device> result(1); auto rView = result.getView(); rView.setValue(-1); auto items = m_items; auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable { if (content[items[i]].key == key) { rView[0] = items[i]; item[0] = content[items[i]]; } }; TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find); return rView.getElement(0) > -1; } template<typename Item, typename Key, typename Device> void HashGraphV2View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter, ArrayView<int, Device> offset) { auto fill = [offset, counter] __cuda_callable__ (int i) mutable { offset[i] = i >= counter.getSize() ? 0 : counter[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), fill); auto reduce = [offset] __cuda_callable__ (int k, int d) mutable { if (k % (1 << (d + 1)) == 0) offset[k + (1 << (d + 1)) - 1] = offset[k + (1 << d) - 1] + offset[k + (1 << (d + 1)) - 1]; }; for (int d = 0; d < log2(offset.getSize() - 1); d++) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize() - 1, reduce, d); offset.setElement(offset.getSize() - 1, offset.getElement(offset.getSize() - 2)); offset.setElement(offset.getSize() - 2, 0); auto up_sweep = [offset] __cuda_callable__ (int r, int d) mutable { if ((offset.getSize() - 2 - r) % (1 << d) == 0) { int l = r - (1 << (d - 1)); int t = offset[r]; offset[r] += offset[l]; offset[l] = t; } }; for (int d = log2(offset.getSize() - 1); d > 0; d--) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize() - 1, up_sweep, d); }
Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -37,6 +37,8 @@ HEADERS := CuckooHash/CuckooHashMap.h\ HashGraph/HashGraphV1/HashGraphV1View.hpp\ HashGraph/HashGraphV2/HashGraphV2.h\ HashGraph/HashGraphV2/HashGraphV2.hpp\ HashGraph/HashGraphV2/HashGraphV2View.h\ HashGraph/HashGraphV2/HashGraphV2View.hpp\ HashFunction.h\ HashFunction.hpp\ Pair.hpp\ Loading