Commit 554db9e5 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Benchmarks: optimized building of the SpMV benchmark using explicit template instantiation

The build time improved from 4:48 to 1:43 on 16 cores, so the efficiency
is not great, but it's still better than nothing. Maybe the benchmarks
could be reorganized into more independent tasks that could be
explicitly instantiated more efficiently (even if there would be less
explicit instantiations eventually).
parent 8703690c
Loading
Loading
Loading
Loading

scripts/eti.py

0 → 100755
+74 −0
Original line number Diff line number Diff line
#! /usr/bin/env python3

import os.path
import pathlib
import re
import sys

if len(sys.argv) != 2:
    print(f"usage: {sys.argv[0]} FILE\n\nwhere FILE is a C++ source code or header file.", file=sys.stderr)
    sys.exit(1)
if not os.path.isfile(sys.argv[1]):
    print(f"error: {sys.argv[1]} is not a valid file.", file=sys.stderr)
    sys.exit(1)

src = sys.argv[1]
basename = os.path.splitext(os.path.basename(src))[0]
dirname = f"{basename}.templates"

if not os.path.isdir(dirname):
    os.mkdir(dirname)

def get_source_code(namespaces, extern_template_instantiation):
    eti = extern_template_instantiation.strip().replace("extern ", "", 1)
    # use absolute path for the include when src is an absolute path
    # (e.g. when called by CMake, because relative include does not work with
    # its separate build dir structure)
    if src == os.path.abspath(src):
        source_code = f"#include \"{src}\"\n"
    # use relative path for the include when src is relative
    else:
        relpath = os.path.relpath(src, dirname)
        source_code = f"#include \"{relpath}\"\n"
    for ns in namespaces:
        source_code += f"namespace {ns} {{\n"
    source_code += eti + "\n"
    for ns in namespaces:
        source_code += f"}} // namespace {ns}\n"
    return source_code

def check_write(content, fname):
    write = False
    if os.path.isfile(fname):
        write = open(fname, "r").read().strip() != content.strip()
    else:
        write = True

    if write is True:
        with open(fname, "w") as out:
            out.write(content)

i = 0
namespaces = []
file_names = set()
for line in open(src).readlines():
    # heuristics for namespaces
    ns_begin = re.search(r"^\s*namespace\s+(\w+)\s*\{$", line)
    if ns_begin:
        namespaces.append(ns_begin.group(1))
    ns_end = re.search(r"^\s*\}\s*\/\/\s*namespace\s+(\w+)$", line)
    if ns_end:
        namespaces.pop(-1)

    if line.strip().startswith("extern template"):
        source_code = get_source_code(namespaces, line)
        for ext in ["cpp", "cu"]:
            fname = f"{dirname}/{basename}.t{i}.{ext}"
            check_write(source_code, fname)
            file_names.add(fname)
        i += 1

# remove extraneous files from the target directory
for path in pathlib.Path(dirname).iterdir():
    if str(path) not in file_names:
        path.unlink()
+10 −8
Original line number Diff line number Diff line
@@ -9,15 +9,17 @@
#include( cmake/BuildCSR5.cmake )

if( BUILD_CUDA )
   file( GLOB EXPLICIT_TEMPLATES spmv.templates/*.cu )
   cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} )
    message( STATUS ${CXX_BENCHMARKS_FLAGS} )
    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${PETSC_LINKER_FLAGS})
   cuda_add_executable( tnl-benchmark-spmv tnl-benchmark-spmv.cu ${EXPLICIT_TEMPLATES} ReferenceFormats/LightSpMV-1.0/SpMV.cu ReferenceFormats/LightSpMV-1.0/SpMVCSR.cu
                        OPTIONS ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
   target_link_libraries( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ${PETSC_LINKER_FLAGS})
else()
    ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
   file( GLOB EXPLICIT_TEMPLATES spmv.templates/*.cpp )
   add_executable( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ${EXPLICIT_TEMPLATES} )
   target_compile_options( tnl-benchmark-spmv  PRIVATE ${CXX_BENCHMARKS_FLAGS} ${PETSC_CXX_FLAGS} )
   target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} )
    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${PETSC_LINKER_FLAGS} )
   target_link_libraries( tnl-benchmark-spmv ${PETSC_LINKER_FLAGS} )
endif()

install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
+2 −2
Original line number Diff line number Diff line
@@ -17,8 +17,8 @@
#pragma push
#pragma diag_suppress = 1444
#include "LightSpMV-1.0/SpMV.h"
#include "LightSpMV-1.0/SpMV.cu"
#include "LightSpMV-1.0/SpMVCSR.cu"
//#include "LightSpMV-1.0/SpMV.cu"
//#include "LightSpMV-1.0/SpMVCSR.cu"
#pragma pop
#endif
#include <TNL/Matrices/SparseMatrix.h>
+1 −0
Original line number Diff line number Diff line
../../../scripts/eti.py
 No newline at end of file
+137 −21

File changed.

Preview size limit exceeded, changes collapsed.

Loading