Loading benchmark/_common/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) endif() if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -Xcompiler") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G") endif() enable_testing() Loading benchmark/owg/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) add_executable(owg owg.cu ${CXX_FLAGS}) add_executable(owg_test owg_test.cu ${CXX_FLAGS}) execute_process(COMMAND git apply ../GpuBTree.patch WORKING_DIRECTORY ./owg/GpuBTree) benchmark/owg/owg_test.cu 0 → 100644 +48 −0 Original line number Diff line number Diff line #include <stdio.h> #include <stdlib.h> #include <algorithm> #include <random> #include <vector> #include "../_common/benchmark.hpp" #include "./GpuBTree/src/GpuBTree.h" int main(int argc, char *argv[]) { using KeyType = uint32_t; using ValueType = uint32_t; std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7)); GpuBTree::GpuBTreeMap<KeyType, ValueType, uint32_t> btree; uint32_t numKeys = input.size(); KeyType *gpuKeys; CHECK_ERROR(memoryUtil::deviceAlloc(gpuKeys, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuKeys, numKeys)); ValueType *gpuValues; CHECK_ERROR(memoryUtil::deviceAlloc(gpuValues, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuValues, numKeys)); btree.insertKeys(gpuKeys, gpuValues, numKeys, SourceT::DEVICE); cudaFree(gpuKeys); cudaFree(gpuValues); KeyType *gpuQueries; CHECK_ERROR(memoryUtil::deviceAlloc(gpuQueries, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuQueries, numKeys)); ValueType *gpuResults; CHECK_ERROR(memoryUtil::deviceAlloc(gpuResults, numKeys)); btree.searchKeys(gpuQueries, gpuResults, numKeys, SourceT::DEVICE); cudaFree(gpuQueries); cudaFree(gpuResults); btree.free(); return 0; } No newline at end of file benchmark/tnl/CMakeLists.txt +4 −4 Original line number Diff line number Diff line Loading @@ -3,13 +3,13 @@ project(tnl) include(../_common/CMakeLists.txt) set(BUILD_CUDA TRUE) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored" ) "${CMAKE_CUDA_FLAGS} --diag_suppress=esa_on_defaulted_function_ignored") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda -DHAVE_CUDA" ) "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DHAVE_CUDA") add_executable(tnl_host tnl_host.cpp ${CXX_TESTS_FLAGS}) add_executable(tnl_cuda tnl_cuda.cu ${CXX_TESTS_FLAGS}) add_executable(tnl_test tnl_test.cu ${CXX_TESTS_FLAGS}) benchmark/tnl/tnl_test.cu 0 → 100644 +42 −0 Original line number Diff line number Diff line #include "../_common/benchmark.hpp" #include "../../implementation/src/BLinkTree/Default.hpp" #include "../../implementation/src/BTreeContainer/Default.hpp" #include <TNL/Assert.h> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Devices/Cuda.h> #include <TNL/Exceptions/NotImplementedError.h> #include <TNL/Pointers/DevicePointer.h> using namespace TNL; using namespace TNL::Containers; int main(int argc, char **argv) { using KeyType = uint32_t; using ValueType = uint32_t; const size_t Order = 15; using Device = TNL::Devices::Cuda; using KeyArray = Containers::Array<ValueType, Device>; using ValueArray = Containers::Array<ValueType, Device>; using MaskArray = Containers::Array<bool, Device>; using Tree = BTreeContainer<KeyType, ValueType, Order, Device, BLinkTree>; std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7)); Tree container(getContainerSize(input.size(), Order)); KeyArray keys(input); ValueArray values(input); container.init(); container.insert(keys, values); ValueArray results(input.size()); MaskArray mask(input.size()); container.find(keys, results, mask); return 0; } No newline at end of file Loading
benchmark/_common/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) endif() if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -Xcompiler") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G") endif() enable_testing() Loading
benchmark/owg/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) add_executable(owg owg.cu ${CXX_FLAGS}) add_executable(owg_test owg_test.cu ${CXX_FLAGS}) execute_process(COMMAND git apply ../GpuBTree.patch WORKING_DIRECTORY ./owg/GpuBTree)
benchmark/owg/owg_test.cu 0 → 100644 +48 −0 Original line number Diff line number Diff line #include <stdio.h> #include <stdlib.h> #include <algorithm> #include <random> #include <vector> #include "../_common/benchmark.hpp" #include "./GpuBTree/src/GpuBTree.h" int main(int argc, char *argv[]) { using KeyType = uint32_t; using ValueType = uint32_t; std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7)); GpuBTree::GpuBTreeMap<KeyType, ValueType, uint32_t> btree; uint32_t numKeys = input.size(); KeyType *gpuKeys; CHECK_ERROR(memoryUtil::deviceAlloc(gpuKeys, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuKeys, numKeys)); ValueType *gpuValues; CHECK_ERROR(memoryUtil::deviceAlloc(gpuValues, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuValues, numKeys)); btree.insertKeys(gpuKeys, gpuValues, numKeys, SourceT::DEVICE); cudaFree(gpuKeys); cudaFree(gpuValues); KeyType *gpuQueries; CHECK_ERROR(memoryUtil::deviceAlloc(gpuQueries, numKeys)); CHECK_ERROR(memoryUtil::cpyToDevice(input.data(), gpuQueries, numKeys)); ValueType *gpuResults; CHECK_ERROR(memoryUtil::deviceAlloc(gpuResults, numKeys)); btree.searchKeys(gpuQueries, gpuResults, numKeys, SourceT::DEVICE); cudaFree(gpuQueries); cudaFree(gpuResults); btree.free(); return 0; } No newline at end of file
benchmark/tnl/CMakeLists.txt +4 −4 Original line number Diff line number Diff line Loading @@ -3,13 +3,13 @@ project(tnl) include(../_common/CMakeLists.txt) set(BUILD_CUDA TRUE) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored" ) "${CMAKE_CUDA_FLAGS} --diag_suppress=esa_on_defaulted_function_ignored") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda -DHAVE_CUDA" ) "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DHAVE_CUDA") add_executable(tnl_host tnl_host.cpp ${CXX_TESTS_FLAGS}) add_executable(tnl_cuda tnl_cuda.cu ${CXX_TESTS_FLAGS}) add_executable(tnl_test tnl_test.cu ${CXX_TESTS_FLAGS})
benchmark/tnl/tnl_test.cu 0 → 100644 +42 −0 Original line number Diff line number Diff line #include "../_common/benchmark.hpp" #include "../../implementation/src/BLinkTree/Default.hpp" #include "../../implementation/src/BTreeContainer/Default.hpp" #include <TNL/Assert.h> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Devices/Cuda.h> #include <TNL/Exceptions/NotImplementedError.h> #include <TNL/Pointers/DevicePointer.h> using namespace TNL; using namespace TNL::Containers; int main(int argc, char **argv) { using KeyType = uint32_t; using ValueType = uint32_t; const size_t Order = 15; using Device = TNL::Devices::Cuda; using KeyArray = Containers::Array<ValueType, Device>; using ValueArray = Containers::Array<ValueType, Device>; using MaskArray = Containers::Array<bool, Device>; using Tree = BTreeContainer<KeyType, ValueType, Order, Device, BLinkTree>; std::vector<KeyType> input(Benchmark::Generators::shuffle<KeyType>(1 << 7)); Tree container(getContainerSize(input.size(), Order)); KeyArray keys(input); ValueArray values(input); container.init(); container.insert(keys, values); ValueArray results(input.size()); MaskArray mask(input.size()); container.find(keys, results, mask); return 0; } No newline at end of file