Loading HashGraph/HashGraphV1/HashGraphV1.hpp +2 −17 Original line number Diff line number Diff line Loading @@ -13,26 +13,11 @@ template<typename Item, typename Key, typename Device> HashGraphV1<Item, Key, Device>::HashGraphV1(const Array<Item, Device>& items) : m_content(items.getSize()), m_items(items.getSize()), m_offset(items.getSize() + 1, 0), m_offset((1 << int(ceil(log2(items.getSize())))), 0), m_hash(31, 10538, items.getSize()), m_view(std::make_shared<ViewType>(*this, items.getConstView(), Array<int, Device>(items.getSize()).getView(), Array<int, Device>(items.getSize()).getView())) { // Array<int, Device> hashes(m_items.getSize()); // for (int i = 0; i < m_items.getSize(); i++) // hashes[i] = m_hash(items[i].key); // Array<int, Device> counter(hashes.getSize(), 0); // for (int i = 0; i < hashes.getSize(); i++) // counter[hashes[i]]++; // fill_offset(counter); // counter.setValue(0, 0, counter.getSize()); // for (int i = 0; i < counter.getSize(); i++) { // int pos = m_offset[hashes[i]] + counter[hashes[i]]++; // new ((void*) &m_items[pos]) Item(items[i]); // } // for (int i = 0; i < items.getSize(); i++) // items[i].~Item(); } Array<int, Device>(items.getSize()).getView())) {} template<typename Item, typename Key, typename Device> int HashGraphV1<Item, Key, Device>::duplicates() const { Loading HashGraph/HashGraphV1/HashGraphV1View.hpp +22 −7 Original line number Diff line number Diff line Loading @@ -62,15 +62,30 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {} template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) { auto offset = m_offset; auto _fill = [counter, offset] __cuda_callable__ (int j) mutable { int latestOffset = 0; for (int i = 0; i < counter.getSize(); i++) { offset[i] = latestOffset; latestOffset += counter[i]; auto fill = [offset, counter] __cuda_callable__ (int i) mutable { offset[i] = i >= counter.getSize() ? 0 : counter[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), fill); auto reduce = [offset] __cuda_callable__ (int k, int d) mutable { if (k % (1 << (d + 1)) == 0) offset[k + (1 << (d + 1)) - 1] = offset[k + (1 << d) - 1] + offset[k + (1 << (d + 1)) - 1]; }; for (int d = 0; d < log2(offset.getSize() - 1); d++) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), reduce, d); offset.setElement(offset.getSize() - 1, 0); auto up_sweep = [offset] __cuda_callable__ (int r, int d) mutable { if ((offset.getSize() - 1 - r) % (1 << d) == 0) { int l = r - (1 << (d - 1)); int t = offset[r]; offset[r] += offset[l]; offset[l] = t; } offset[counter.getSize()] = latestOffset; }; TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill); for (int d = log2(offset.getSize()); d > 0; d--) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), up_sweep, d); } template<typename Item, typename Key, typename Device> Loading Loading
HashGraph/HashGraphV1/HashGraphV1.hpp +2 −17 Original line number Diff line number Diff line Loading @@ -13,26 +13,11 @@ template<typename Item, typename Key, typename Device> HashGraphV1<Item, Key, Device>::HashGraphV1(const Array<Item, Device>& items) : m_content(items.getSize()), m_items(items.getSize()), m_offset(items.getSize() + 1, 0), m_offset((1 << int(ceil(log2(items.getSize())))), 0), m_hash(31, 10538, items.getSize()), m_view(std::make_shared<ViewType>(*this, items.getConstView(), Array<int, Device>(items.getSize()).getView(), Array<int, Device>(items.getSize()).getView())) { // Array<int, Device> hashes(m_items.getSize()); // for (int i = 0; i < m_items.getSize(); i++) // hashes[i] = m_hash(items[i].key); // Array<int, Device> counter(hashes.getSize(), 0); // for (int i = 0; i < hashes.getSize(); i++) // counter[hashes[i]]++; // fill_offset(counter); // counter.setValue(0, 0, counter.getSize()); // for (int i = 0; i < counter.getSize(); i++) { // int pos = m_offset[hashes[i]] + counter[hashes[i]]++; // new ((void*) &m_items[pos]) Item(items[i]); // } // for (int i = 0; i < items.getSize(); i++) // items[i].~Item(); } Array<int, Device>(items.getSize()).getView())) {} template<typename Item, typename Key, typename Device> int HashGraphV1<Item, Key, Device>::duplicates() const { Loading
HashGraph/HashGraphV1/HashGraphV1View.hpp +22 −7 Original line number Diff line number Diff line Loading @@ -62,15 +62,30 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {} template<typename Item, typename Key, typename Device> void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) { auto offset = m_offset; auto _fill = [counter, offset] __cuda_callable__ (int j) mutable { int latestOffset = 0; for (int i = 0; i < counter.getSize(); i++) { offset[i] = latestOffset; latestOffset += counter[i]; auto fill = [offset, counter] __cuda_callable__ (int i) mutable { offset[i] = i >= counter.getSize() ? 0 : counter[i]; }; TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), fill); auto reduce = [offset] __cuda_callable__ (int k, int d) mutable { if (k % (1 << (d + 1)) == 0) offset[k + (1 << (d + 1)) - 1] = offset[k + (1 << d) - 1] + offset[k + (1 << (d + 1)) - 1]; }; for (int d = 0; d < log2(offset.getSize() - 1); d++) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), reduce, d); offset.setElement(offset.getSize() - 1, 0); auto up_sweep = [offset] __cuda_callable__ (int r, int d) mutable { if ((offset.getSize() - 1 - r) % (1 << d) == 0) { int l = r - (1 << (d - 1)); int t = offset[r]; offset[r] += offset[l]; offset[l] = t; } offset[counter.getSize()] = latestOffset; }; TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill); for (int d = log2(offset.getSize()); d > 0; d--) TNL::Algorithms::ParallelFor<Device>::exec(0, offset.getSize(), up_sweep, d); } template<typename Item, typename Key, typename Device> Loading