Loading HashGraph/HashGraphV1/HashGraphV1View.hpp +48 −25 Original line number Diff line number Diff line Loading @@ -18,31 +18,42 @@ HashGraphV1View<Item, Key, Device>::HashGraphV1View(TableType& table, } template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType items, void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType input, ArrayView<int, Device> hashes, ArrayView<int, Device> counter) { auto content = m_content; auto fill_content = [content, items] __cuda_callable__ (int i) mutable { content[i] = items[i]; auto fill_content = [content, input] __cuda_callable__ (int i) mutable { content[i] = input[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, items.getSize(), fill_content); TNL::Algorithms::ParallelFor<Device>::exec(0, input.getSize(), fill_content); auto hash = m_hash; auto init_hashes = [hash, hashes, items] __cuda_callable__ (int i) mutable { hashes[i] = hash(items[i].key); auto init_hashes = [hash, hashes, input] __cuda_callable__ (int i) mutable { hashes[i] = hash(input[i].key); }; TNL::Algorithms::ParallelFor<Device>::exec(0, m_items.getSize(), init_hashes); counter.setValue(0); auto count_hashes = [hashes, counter] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ atomicAdd(&counter[hashes[i]], 1); #else counter[hashes[i]]++; #endif }; TNL::Algorithms::ParallelFor<Device>::exec(0, hashes.getSize(), count_hashes); fill_offset(counter); counter.setValue(0, 0, counter.getSize()); for (int i = 0; i < counter.getSize(); i++) { int pos = m_offset[hashes[i]] + counter[hashes[i]]++; m_items[pos] = i; } auto offset = m_offset; auto items = m_items; auto place = [offset, counter, hashes, items] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1); #else int pos = offset[hashes[i]] + counter[hashes[i]]++; #endif items[pos] = i; }; TNL::Algorithms::ParallelFor<Device>::exec(0, counter.getSize(), place); } template<typename Item, typename Key, typename Device> Loading @@ -50,12 +61,16 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {} template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) { auto offset = m_offset; auto _fill = [counter, offset] __cuda_callable__ (int j) mutable { int latestOffset = 0; for (int i = 0; i < counter.getSize(); i++) { m_offset[i] = latestOffset; offset[i] = latestOffset; latestOffset += counter[i]; } m_offset[counter.getSize()] = latestOffset; offset[counter.getSize()] = latestOffset; }; TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill); } template<typename Item, typename Key, typename Device> Loading @@ -64,15 +79,23 @@ int HashGraphV1View<Item, Key, Device>::duplicates() const { } template<typename Item, typename Key, typename Device> bool HashGraphV1View<Item, Key, Device>::find(const Key& key, Item* item) const { bool HashGraphV1View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { int hash = m_hash(key); int end = m_offset[hash + 1]; for (int i = m_offset[hash]; i < end; i++) { if (m_content[m_items[i]].key == key) { if (item) *item = m_content[m_items[i]]; return true; } int end = m_offset.getElement(hash + 1); auto content = m_content; Array<int, Device> result(1); auto rView = result.getView(); rView.setValue(-1); auto items = m_items; auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable { if (content[items[i]].key == key) { rView[0] = items[i]; item[0] = content[items[i]]; } }; TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find); if (rView.getElement(0) > -1) return true; else return false; } Loading
HashGraph/HashGraphV1/HashGraphV1View.hpp +48 −25 Original line number Diff line number Diff line Loading @@ -18,31 +18,42 @@ HashGraphV1View<Item, Key, Device>::HashGraphV1View(TableType& table, } template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType items, void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType input, ArrayView<int, Device> hashes, ArrayView<int, Device> counter) { auto content = m_content; auto fill_content = [content, items] __cuda_callable__ (int i) mutable { content[i] = items[i]; auto fill_content = [content, input] __cuda_callable__ (int i) mutable { content[i] = input[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, items.getSize(), fill_content); TNL::Algorithms::ParallelFor<Device>::exec(0, input.getSize(), fill_content); auto hash = m_hash; auto init_hashes = [hash, hashes, items] __cuda_callable__ (int i) mutable { hashes[i] = hash(items[i].key); auto init_hashes = [hash, hashes, input] __cuda_callable__ (int i) mutable { hashes[i] = hash(input[i].key); }; TNL::Algorithms::ParallelFor<Device>::exec(0, m_items.getSize(), init_hashes); counter.setValue(0); auto count_hashes = [hashes, counter] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ atomicAdd(&counter[hashes[i]], 1); #else counter[hashes[i]]++; #endif }; TNL::Algorithms::ParallelFor<Device>::exec(0, hashes.getSize(), count_hashes); fill_offset(counter); counter.setValue(0, 0, counter.getSize()); for (int i = 0; i < counter.getSize(); i++) { int pos = m_offset[hashes[i]] + counter[hashes[i]]++; m_items[pos] = i; } auto offset = m_offset; auto items = m_items; auto place = [offset, counter, hashes, items] __cuda_callable__ (int i) mutable { #ifdef __CUDA_ARCH__ int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1); #else int pos = offset[hashes[i]] + counter[hashes[i]]++; #endif items[pos] = i; }; TNL::Algorithms::ParallelFor<Device>::exec(0, counter.getSize(), place); } template<typename Item, typename Key, typename Device> Loading @@ -50,12 +61,16 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {} template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) { auto offset = m_offset; auto _fill = [counter, offset] __cuda_callable__ (int j) mutable { int latestOffset = 0; for (int i = 0; i < counter.getSize(); i++) { m_offset[i] = latestOffset; offset[i] = latestOffset; latestOffset += counter[i]; } m_offset[counter.getSize()] = latestOffset; offset[counter.getSize()] = latestOffset; }; TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill); } template<typename Item, typename Key, typename Device> Loading @@ -64,15 +79,23 @@ int HashGraphV1View<Item, Key, Device>::duplicates() const { } template<typename Item, typename Key, typename Device> bool HashGraphV1View<Item, Key, Device>::find(const Key& key, Item* item) const { bool HashGraphV1View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const { int hash = m_hash(key); int end = m_offset[hash + 1]; for (int i = m_offset[hash]; i < end; i++) { if (m_content[m_items[i]].key == key) { if (item) *item = m_content[m_items[i]]; return true; } int end = m_offset.getElement(hash + 1); auto content = m_content; Array<int, Device> result(1); auto rView = result.getView(); rView.setValue(-1); auto items = m_items; auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable { if (content[items[i]].key == key) { rView[0] = items[i]; item[0] = content[items[i]]; } }; TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find); if (rView.getElement(0) > -1) return true; else return false; }