Commit f7744e77 authored by kolusask's avatar kolusask
Browse files

Finish porting HashGraphV1

parent c6f8c5b1
Loading
Loading
Loading
Loading
+48 −25
Original line number Diff line number Diff line
@@ -18,31 +18,42 @@ HashGraphV1View<Item, Key, Device>::HashGraphV1View(TableType& table,
}

template<typename Item, typename Key, typename Device>
void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType items,
void HashGraphV1View<Item, Key, Device>::build(const typename Array<Item, Device>::ConstViewType input,
                                               ArrayView<int, Device> hashes,
                                               ArrayView<int, Device> counter) {
    
    auto content = m_content;
    auto fill_content = [content, items] __cuda_callable__ (int i) mutable {
        content[i] = items[i];
    auto fill_content = [content, input] __cuda_callable__ (int i) mutable {
        content[i] = input[i];
    };
    TNL::Algorithms::ParallelFor<Device>::exec(0, items.getSize(), fill_content);
    TNL::Algorithms::ParallelFor<Device>::exec(0, input.getSize(), fill_content);
    auto hash = m_hash;
    auto init_hashes = [hash, hashes, items] __cuda_callable__ (int i) mutable {
        hashes[i] = hash(items[i].key);
    auto init_hashes = [hash, hashes, input] __cuda_callable__ (int i) mutable {
        hashes[i] = hash(input[i].key);
    };
    TNL::Algorithms::ParallelFor<Device>::exec(0, m_items.getSize(), init_hashes);
    counter.setValue(0);
    auto count_hashes = [hashes, counter] __cuda_callable__ (int i) mutable {
        #ifdef __CUDA_ARCH__
        atomicAdd(&counter[hashes[i]], 1);
        #else
        counter[hashes[i]]++;
        #endif
    };
    TNL::Algorithms::ParallelFor<Device>::exec(0, hashes.getSize(), count_hashes);
    fill_offset(counter);
    counter.setValue(0, 0, counter.getSize());
    for (int i = 0; i < counter.getSize(); i++) {
        int pos = m_offset[hashes[i]] + counter[hashes[i]]++;
        m_items[pos] = i;
    }
    auto offset = m_offset;
    auto items = m_items;
    auto place = [offset, counter, hashes, items] __cuda_callable__ (int i) mutable {
        #ifdef __CUDA_ARCH__
        int pos = offset[hashes[i]] + atomicAdd(&counter[hashes[i]], 1);
        #else
        int pos = offset[hashes[i]] + counter[hashes[i]]++;
        #endif
        items[pos] = i;
    };
    TNL::Algorithms::ParallelFor<Device>::exec(0, counter.getSize(), place);
}

template<typename Item, typename Key, typename Device>
@@ -50,12 +61,16 @@ HashGraphV1View<Item, Key, Device>::~HashGraphV1View() {}

template<typename Item, typename Key, typename Device>
void HashGraphV1View<Item, Key, Device>::fill_offset(const ArrayView<int, Device>& counter) {
    auto offset = m_offset;
    auto _fill = [counter, offset] __cuda_callable__ (int j) mutable {
        int latestOffset = 0;
        for (int i = 0; i < counter.getSize(); i++) {
        m_offset[i] = latestOffset;
            offset[i] = latestOffset;
            latestOffset += counter[i];
        }
    m_offset[counter.getSize()] = latestOffset;
        offset[counter.getSize()] = latestOffset;
    };
    TNL::Algorithms::ParallelFor<Device>::exec(0, 1, _fill);
}

template<typename Item, typename Key, typename Device>
@@ -64,15 +79,23 @@ int HashGraphV1View<Item, Key, Device>::duplicates() const {
}

template<typename Item, typename Key, typename Device>
bool HashGraphV1View<Item, Key, Device>::find(const Key& key, Item* item) const {
bool HashGraphV1View<Item, Key, Device>::find(const Key& key, ArrayView<Item, Device> item) const {
    int hash = m_hash(key);
    int end = m_offset[hash + 1];
    for (int i = m_offset[hash]; i < end; i++) {
        if (m_content[m_items[i]].key == key) {
            if (item)
                *item = m_content[m_items[i]];
            return true;
        }
    int end = m_offset.getElement(hash + 1);
    auto content = m_content;
    Array<int, Device> result(1);
    auto rView = result.getView();
    rView.setValue(-1);
    auto items = m_items;
    auto _find = [content, items, item, rView, key] __cuda_callable__ (int i) mutable {
        if (content[items[i]].key == key) {
            rView[0] = items[i];
            item[0] = content[items[i]];
        }
    };
    TNL::Algorithms::ParallelFor<Device>::exec(m_offset.getElement(hash), end, _find);
    if (rView.getElement(0) > -1)
        return true;
    else
        return false;
}