Commit 0a3b9cd5 authored by Tat Dat Duong's avatar Tat Dat Duong
Browse files

chore: add debug for nsight compute

parent 042781f2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -Xcompiler")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G")
endif()

enable_testing()
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

add_executable(owg owg.cu ${CXX_FLAGS})
add_executable(owg_test owg_test.cu ${CXX_FLAGS})

execute_process(COMMAND git apply ../GpuBTree.patch
                WORKING_DIRECTORY ./owg/GpuBTree)
+48 −0
Original line number Diff line number Diff line
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <random>
#include <vector>

#include "../_common/benchmark.hpp"
#include "./GpuBTree/src/GpuBTree.h"

int main(int argc, char *argv[]) {
  using KeyType = uint32_t;
  using ValueType = uint32_t;

  std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7));

  GpuBTree::GpuBTreeMap<KeyType, ValueType, uint32_t> btree;

  uint32_t numKeys = input.size();

  KeyType *gpuKeys;
  CHECK_ERROR(memoryUtil::deviceAlloc(gpuKeys, numKeys));
  CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuKeys, numKeys));

  ValueType *gpuValues;
  CHECK_ERROR(memoryUtil::deviceAlloc(gpuValues, numKeys));
  CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuValues, numKeys));

  btree.insertKeys(gpuKeys, gpuValues, numKeys, SourceT::DEVICE);

  cudaFree(gpuKeys);
  cudaFree(gpuValues);

  KeyType *gpuQueries;
  CHECK_ERROR(memoryUtil::deviceAlloc(gpuQueries, numKeys));
  CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuQueries, numKeys));

  ValueType *gpuResults;
  CHECK_ERROR(memoryUtil::deviceAlloc(gpuResults, numKeys));

  btree.searchKeys(gpuQueries, gpuResults, numKeys, SourceT::DEVICE);

  cudaFree(gpuQueries);
  cudaFree(gpuResults);
  btree.free();

  return 0;
}
 No newline at end of file
+4 −4
Original line number Diff line number Diff line
@@ -3,13 +3,13 @@ project(tnl)
include(../_common/CMakeLists.txt)

set(BUILD_CUDA TRUE)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe")
set(CMAKE_CUDA_FLAGS
    "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored"
)
    "${CMAKE_CUDA_FLAGS} --diag_suppress=esa_on_defaulted_function_ignored")
set(CMAKE_CUDA_FLAGS
    "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda -DHAVE_CUDA"
)
    "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DHAVE_CUDA")

add_executable(tnl_host tnl_host.cpp ${CXX_TESTS_FLAGS})
add_executable(tnl_cuda tnl_cuda.cu ${CXX_TESTS_FLAGS})
add_executable(tnl_test tnl_test.cu ${CXX_TESTS_FLAGS})
+42 −0
Original line number Diff line number Diff line
#include "../_common/benchmark.hpp"

#include "../../implementation/src/BLinkTree/Default.hpp"
#include "../../implementation/src/BTreeContainer/Default.hpp"

#include <TNL/Assert.h>
#include <TNL/Containers/Array.h>
#include <TNL/Containers/ArrayView.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Exceptions/NotImplementedError.h>
#include <TNL/Pointers/DevicePointer.h>

using namespace TNL;
using namespace TNL::Containers;

int main(int argc, char **argv) {
  using KeyType = uint32_t;
  using ValueType = uint32_t;
  const size_t Order = 15;

  using Device = TNL::Devices::Cuda;
  using KeyArray = Containers::Array<ValueType, Device>;
  using ValueArray = Containers::Array<ValueType, Device>;
  using MaskArray = Containers::Array<bool, Device>;
  using Tree = BTreeContainer<KeyType, ValueType, Order, Device, BLinkTree>;

  std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7));

  Tree container(getContainerSize(input.size(), Order));

  KeyArray keys(input);
  ValueArray values(input);

  container.init();
  container.insert(keys, values);

  ValueArray results(input.size());
  MaskArray mask(input.size());

  container.find(keys, results, mask);
  return 0;
}
 No newline at end of file