Skip to content
Snippets Groups Projects 9.91 KiB
Newer Older
""" Autotuning for sum factorization kernels """

Dominic Kempf's avatar
Dominic Kempf committed
from dune.codegen.generation import cache_restoring, delete_cache_items
from import DuneTarget
from dune.codegen.sumfact.realization import realize_sumfact_kernel_function
from dune.codegen.options import get_option, set_option
from dune.codegen.error import CodegenAutotuneError

import loopy as lp
from pytools import product

import os
import re
import subprocess

def get_cmake_cache_entry(entry):
    for line in open(os.path.join(get_option("project_basedir"), "CMakeCache.txt"), "r"):
        match = re.match("{}:[INTERNAL|FILEPATH|BOOL|STRING|PATH|UNINITIALIZED|STATIC]+=(.*)".format(entry), line)
        if match:
            return match.groups()[0]

Dominic Kempf's avatar
Dominic Kempf committed
def get_dune_codegen_dir():
    if get_cmake_cache_entry("CMAKE_PROJECT_NAME") == "dune-codegen":
        return get_option("project_basedir")
Dominic Kempf's avatar
Dominic Kempf committed
        return get_cmake_cache_entry("dune-codegen_DIR")
def compiler_invocation(name, filename):
    # Determine the CMake Generator in use
    gen = get_cmake_cache_entry("CMAKE_GENERATOR")
    assert(gen == "Unix Makefiles")

    # Find compiler path
    compiler = get_cmake_cache_entry("CMAKE_CXX_COMPILER")
    compile_flags = [compiler]

    # Parse compiler flags
Dominic Kempf's avatar
Dominic Kempf committed
    for line in open(os.path.join(get_dune_codegen_dir(), "python", "CMakeFiles", "_autotune_target.dir", "flags.make"), "r"):
        match = re.match("([^=]*)=(.*)", line)
        if match:

    # Add the source file

    # Parse linker flags
Dominic Kempf's avatar
Dominic Kempf committed
    for line in open(os.path.join(get_dune_codegen_dir(), "python", "CMakeFiles", "_autotune_target.dir", "link.txt"), "r"):
        match = re.match(".*_autotune_target (.*)", line)
        if match:
            for flag in match.groups()[0].split():
                if flag.startswith("-") or os.path.isabs(flag):
Dominic Kempf's avatar
Dominic Kempf committed
                    compile_flags.append(os.path.join(get_dune_codegen_dir(), "python", flag))

    # Set an output name

    return compile_flags

def generate_standalone_code(sf, filename):

    with open(filename, "w") as f:
        f.writelines(["#include \"config.h\"\n",
Dominic Kempf's avatar
Dominic Kempf committed

        f.writelines(["int main(int argc, char** argv)\n",

        # Setup a polynomial object (normally done in the LocalOperator members)
Dominic Kempf's avatar
Dominic Kempf committed
        opcounting = get_option("opcounter")
        set_option("opcounter", False)
Dominic Kempf's avatar
Dominic Kempf committed
        from import type_floatingpoint
        real = type_floatingpoint()
        f.write("  using RF = {};\n".format(real))
        f.write("  using DF = {};\n".format(real))

Dominic Kempf's avatar
Dominic Kempf committed
        from dune.codegen.sumfact.tabulation import name_polynomials
René Heß's avatar
René Heß committed
        degs = tuple(m.basis_size - 1 for m in sf.matrix_sequence_quadrature_permuted)
        for deg in set(degs):
            f.write("  Dune::QkStuff::EquidistantLagrangePolynomials<DF, RF, {}> {};\n".format(deg, name_polynomials(deg)))

        # Get kernels
Dominic Kempf's avatar
Dominic Kempf committed
        from dune.codegen.pdelab.localoperator import extract_kernel_from_cache
        knl = realize_sumfact_kernel_function(sf)
        constructor_knl = extract_kernel_from_cache("operator", "constructor_kernel", None, wrap_in_cgen=False, add_timings=False)
        constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False))
        constructor_knl = lp.get_one_scheduled_kernel(constructor_knl)

        # Allocate buffers
René Heß's avatar
René Heß committed
        size = max(product(m.quadrature_size for m in sf.matrix_sequence_quadrature_permuted) * sf.vector_width,
                   product(m.basis_size for m in sf.matrix_sequence_quadrature_permuted) * sf.vector_width)
Dominic Kempf's avatar
Dominic Kempf committed
        size = int(size * (get_option("precision_bits") / 8))
        f.writelines(["  char buffer0[{}] __attribute__ ((aligned (32)));\n".format(size),
                      "  char buffer1[{}] __attribute__ ((aligned (32)));\n".format(size),

        # Setup fastdg inputs
        for arg in sf.interface.signature_args:
            if "jacobian" in arg:
                f.write("{} = 0;\n".format(arg))
Dominic Kempf's avatar
Dominic Kempf committed
                size = sf.interface.fastdg_interface_object_size
                f.write("RF {}[{}] __attribute__ ((aligned (32)));\n".format(arg.split()[-1], size))
        # Write stuff into the input buffer
        f.writelines(["  {0} *input = ({0} *)buffer0;\n".format(real),
                      "  {0} *output = ({0} *)buffer{1};\n".format(real, sf.length % 2),
                      "  for(int i=0; i<{}; ++i)\n".format(size / (get_option("precision_bits") / 8)),
                      "    input[i] = ({})(i+1);\n".format(real),

        target = DuneTarget()
        from loopy.codegen import CodeGenerationState
        codegen_state = CodeGenerationState(kernel=constructor_knl,

        for decl in target.get_device_ast_builder().get_temporary_decls(codegen_state, 0):
            f.write("  {}\n".format(next(iter(decl.generate()))))

        for _, line in constructor_knl.preambles:
            if "gfsu" not in line:
                f.write("  {}\n".format(line))

        # Add setup code for theta matrices. We add some lines not necessary,
        # but it would be more work to remove them than keeping them.
        for line in lp.generate_body(constructor_knl).split("\n")[1:-1]:
            if "gfsu" not in line and "meshwidth" not in line and "geometry" not in line:
                f.write("  {}\n".format(line))

        # INtroduces a variable that makes sure that the kernel cannot be optimized away
        f.writelines(["  {} accum;\n".format(real),
                      "  std::mt19937 rng;\n",
                      "  rng.seed(42);\n",
                      "  std::uniform_int_distribution<> dis(0, {});\n".format(size / (get_option("precision_bits") / 8)),

        # Start a TSC timer
Dominic Kempf's avatar
Dominic Kempf committed
        f.writelines(["  auto start = Dune::PDELab::TSC::start();\n",

        # Add the implementation of the kernel.
Dominic Kempf's avatar
Dominic Kempf committed
        f.write("  for(int i=0; i<{}; ++i)\n".format(int(1e9 / sf.operations)))
        f.write("  {\n")
        for line in knl.member.lines[1:]:
            f.write("    {}\n".format(line))
        f.write("  }\n")

        # Stop the TSC timer and write the result to a file
        f.writelines(["  auto stop = Dune::PDELab::TSC::stop();\n",
                      "  std::ofstream file;\n",
                      "  file << Dune::PDELab::TSC::elapsed(start, stop) << std::endl;\n",
                      "  file.close();\n",
                      "  accum += output[dis(rng)];\n",
                      "  std::cout << accum;\n",
        set_option("opcounter", opcounting)

def autotune_realization(sf):
    # Make sure that the benchmark directory exists
    dir = os.path.join(get_option("project_basedir"), "autotune-benchmarks")
    if not os.path.exists(dir):

    basename = "autotune_sumfact_{}".format(sf.function_name)
    basename = hashlib.sha256(basename.encode()).hexdigest()
    filename = os.path.join(dir, "{}.cc".format(basename))
    logname = os.path.join(dir, "{}.log".format(basename))
    lock = os.path.join(dir, "{}.lock".format(basename))
    executable = os.path.join(dir, basename)

    # Generate and compile a benchmark program
    with cache_restoring():
        with filelock.FileLock(lock):
            if not os.path.isfile(logname):
                generate_standalone_code(sf, filename)
                devnull = open(os.devnull, 'w')
                ret =, filename), stdout=devnull, stderr=subprocess.STDOUT)
                if ret != 0:
Dominic Kempf's avatar
Dominic Kempf committed
                    raise CodegenAutotuneError("Compilation of autotune executable failed. Invocation: {}".format(" ".join(compiler_invocation(name, filename))))

                # Check whether the user specified an execution wrapper
                call = []
                wrapper = get_cmake_cache_entry("DUNE_CODEGEN_BENCHMARK_WRAPPER")
                if wrapper:

                # Run the benchmark program
                ret =, stdout=devnull, stderr=subprocess.STDOUT)
                if ret != 0:
Dominic Kempf's avatar
Dominic Kempf committed
                    raise CodegenAutotuneError("Execution of autotune benchmark failed. Invocation: {}".format(" ".join(call)))
            # Extract the result form the log file
            return float(next(iter(open(logname, "r")))) / 1000000