From 41a88da85e811c9d69fbd5f1620faa6860947f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Thu, 4 Apr 2019 15:26:18 +0200 Subject: [PATCH] [skip ci] Pass transformation string to autotuning This can be used to distinguish between kernels with the same name but different transformations. --- python/dune/codegen/sumfact/autotune.py | 27 +++++++++++-- .../dune/codegen/sumfact/transformations.py | 38 +++++++++++++------ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index 64d9d43a..393b21ff 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -304,8 +304,14 @@ def generate_standalone_code(sf, filename): set_option("opcounter", opcounting) -def generate_standalone_kernel_code(kernel, signature, filename): +def generate_standalone_kernel_code(kernel, signature, filename, transformations=None): with open(filename, 'w') as f: + if transformations: + f.write('// Transformations:\n') + for trafo in transformations: + f.write('// {}\n'.format(trafo)) + f.write('\n') + # Write headers headers = ['#include "config.h"', '#include <iostream>', @@ -410,7 +416,17 @@ def generate_standalone_kernel_code(kernel, signature, filename): f.write('\n'.join(main)) -def autotune_realization(sf=None, kernel=None, signature=None): +def autotune_realization(sf=None, kernel=None, signature=None, transformations=None): + """Generate an microbenchmark, compile run and return time + + Parameters + ---------- + sf: SumfactKernel or VectorizedSumfactKernel + kernel: loopy.kernel.LoopKernel + signature: str + transformation: list of str + Will be used to distinguish between autotune targets + """ if sf is None: assert kernel is not None assert signature is not None @@ -427,6 +443,9 @@ def autotune_realization(sf=None, kernel=None, signature=None): basename = "autotune_sumfact_{}".format(kernel.name) else: basename = "autotune_sumfact_{}".format(sf.function_name) + if transformations: + for trafo in transformations: + basename = '{}_{}'.format(basename, trafo) basename = hashlib.sha256(basename.encode()).hexdigest() filename = os.path.join(dir, "{}.cc".format(basename)) @@ -434,6 +453,8 @@ def autotune_realization(sf=None, kernel=None, signature=None): lock = os.path.join(dir, "{}.lock".format(basename)) executable = os.path.join(dir, basename) + print("palpo filename: {}".format(filename)) + # Generate and compile a benchmark program # # Note: cache restoring is only necessary when generating from SumfactKernel @@ -441,7 +462,7 @@ def autotune_realization(sf=None, kernel=None, signature=None): with filelock.FileLock(lock): if not os.path.isfile(logname): if sf is None: - generate_standalone_kernel_code(kernel, signature, filename) + generate_standalone_kernel_code(kernel, signature, filename, transformations) elif get_option("autotune_google_benchmark"): generate_standalone_code_google_benchmark(sf, filename) else: diff --git a/python/dune/codegen/sumfact/transformations.py b/python/dune/codegen/sumfact/transformations.py index b0a97923..8342a343 100644 --- a/python/dune/codegen/sumfact/transformations.py +++ b/python/dune/codegen/sumfact/transformations.py @@ -94,7 +94,7 @@ def move_zero_assignment_up(kernel, move_up_inames): return kernel -def reorder_loops_in_tensor_contraction(kernel, iname_order): +def _reorder_loops_in_tensor_contraction_direct(kernel, iname_order): """Reorder the loop nest of the tensor contractions iname_order is a string that specifies the loop order. We use the following convention: @@ -172,12 +172,15 @@ def reorder_loops_in_tensor_contraction(kernel, iname_order): return kernel -def reorder_loops_in_tensor_contraction_with_accumulation_variable(kernel, iname_order): +def _reorder_loops_in_tensor_contraction_accum(kernel, iname_order): dim = world_dimension() assert dim == 3 + if iname_order.endswith('j'): + return kernel + # kernel = remove_all_reductions(kernel) - kernel = reorder_loops_in_tensor_contraction(kernel, iname_order) + kernel = _reorder_loops_in_tensor_contraction_direct(kernel, iname_order) cond = lp.match.Tagged('set_zero') for instr in lp.find_instructions(kernel, cond): @@ -281,17 +284,30 @@ def reorder_loops_in_tensor_contraction_with_accumulation_variable(kernel, iname return kernel +def reorder_loops_in_tensor_contraction(kernel, iname_order, accum_variable=True): + if accum_variable: + return _reorder_loops_in_tensor_contraction_accum(kernel, iname_order) + else: + return _reorder_loops_in_tensor_contraction_direct(kernel, iname_order) + + def tensor_contraction_loop_order_generator(kernel): dim = world_dimension() assert dim == 3 - yield kernel indices = ['l', 'k', 'i', 'j'] import itertools for loop_order in itertools.permutations(indices): - loop_order = ''.join(loop_order) - new_kernel = reorder_loops_in_tensor_contraction(kernel, loop_order) - yield new_kernel + # palpo TODO: Heavy culling for 'quick' tests during development + if loop_order[0] != 'l' or loop_order[1] != 'k': + continue + + order = ''.join(loop_order) + new_kernel = reorder_loops_in_tensor_contraction(kernel, order, True) + yield new_kernel, ['reorder_loops_in_tensor_contraction_{}_True'.format(order),] + + new_kernel = reorder_loops_in_tensor_contraction(kernel, loop_order, False) + yield new_kernel, ['reorder_loops_in_tensor_contraction_{}_False'.format(order),] def simple_autotuner(kernel_generator, signature): @@ -299,11 +315,11 @@ def simple_autotuner(kernel_generator, signature): from dune.codegen.options import set_option set_option("autotune_google_benchmark", True) - kernel = next(kernel_generator) - best_cost = autotune_realization(kernel=kernel, signature=signature) + kernel, transformations = next(kernel_generator) + best_cost = autotune_realization(kernel=kernel, signature=signature, transformations=transformations) best_kernel = kernel - for kernel in kernel_generator: - cost = autotune_realization(kernel=kernel, signature=signature) + for kernel, transformations in kernel_generator: + cost = autotune_realization(kernel=kernel, signature=signature, transformations=transformations) if cost < best_cost: best_cost = cost best_kernel = kernel -- GitLab