Finish porting HashGraphV1 (f7744e77) · Commits · TNL / CuckooHashing

HashGraph/HashGraphV1/HashGraphV1View.hpp

+48 −25

Original line number	Diff line number	Diff line
		@@ -18,31 +18,42 @@ HashGraphV1View<Item, Key, Device>::HashGraphV1View(TableType& table,
		}

		template<typename Item, typename Key, typename Device>
		void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType items,
		void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType input,
		ArrayView<int, Device> hashes,
		ArrayView<int, Device> counter) {

		auto content = m_content;
		auto fill_content = [content, items] __cuda_callable__ (int i) mutable {
		content[i] = items[i];
		auto fill_content = [content, input] __cuda_callable__ (int i) mutable {
		content[i] = input[i];
		};
		TNL::Algorithms::ParallelFor<Device>::exec(0, items.getSize(), fill_content);
		TNL::Algorithms::ParallelFor<Device>::exec(0, input.getSize(), fill_content);
		auto hash = m_hash;
		auto init_hashes = [hash, hashes, items] __cuda_callable__ (int i) mutable {
		hashes[i] = hash(items[i].key);
		auto init_hashes = [hash, hashes, input] __cuda_callable__ (int i) mutable {
		hashes[i] = hash(input[i].key);
		};
		TNL::Algorithms::ParallelFor<Device>::exec(0, m_items.getSize(), init_hashes);
		counter.setValue(0);
		auto count_hashes = [hashes, counter] __cuda_callable__ (int i) mutable {
		#ifdef __CUDA_ARCH__
		atomicAdd(&counter[hashes[i]], 1);
		#else
		counter[hashes[i]]++;
		#endif
		};
		TNL::Algorithms::ParallelFor<Device>::exec(0, hashes.getSize(), count_hashes);
		fill_offset(counter);
		counter.setValue(0, 0, counter.getSize());
		for (int i = 0; i < counter.getSize(); i++) {
		int pos = m_offset[hashes[i]] + counter[hashes[i]]++;
		m_items[pos] = i;
		}
		auto offset = m_offset;
		auto items = m_items;
		auto place = [offset, counter, hashes, items] __cuda_callable__ (int i) mutable {
		#ifdef __CUDA_ARCH__
		int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1);
		#else
		int pos = offset[hashes[i]] + counter[hashes[i]]++;
		#endif
		items[pos] = i;
		};
		TNL::Algorithms::ParallelFor<Device>::exec(0, counter.getSize(), place);
		}

		template<typename Item, typename Key, typename Device>
		@@ -50,12 +61,16 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {}

		template<typename Item, typename Key, typename Device>
		void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) {
		auto offset = m_offset;
		auto _fill = [counter, offset] __cuda_callable__ (int j) mutable {
		int latestOffset = 0;
		for (int i = 0; i < counter.getSize(); i++) {
		m_offset[i] = latestOffset;
		offset[i] = latestOffset;
		latestOffset += counter[i];
		}
		m_offset[counter.getSize()] = latestOffset;
		offset[counter.getSize()] = latestOffset;
		};
		TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill);
		}

		template<typename Item, typename Key, typename Device>
		@@ -64,15 +79,23 @@ int HashGraphV1View<Item, Key, Device>::duplicates() const {
		}

		template<typename Item, typename Key, typename Device>
		bool HashGraphV1View<Item, Key, Device>::find(const Key& key, Item* item) const {
		bool HashGraphV1View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const {
		int hash = m_hash(key);
		int end = m_offset[hash + 1];
		for (int i = m_offset[hash]; i < end; i++) {
		if (m_content[m_items[i]].key == key) {
		if (item)
		*item = m_content[m_items[i]];
		return true;
		}
		int end = m_offset.getElement(hash + 1);
		auto content = m_content;
		Array<int, Device> result(1);
		auto rView = result.getView();
		rView.setValue(-1);
		auto items = m_items;
		auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable {
		if (content[items[i]].key == key) {
		rView[0] = items[i];
		item[0] = content[items[i]];
		}
		};
		TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find);
		if (rView.getElement(0) > -1)
		return true;
		else
		return false;
		}