Skip to content
Snippets Groups Projects
Commit 41a88da8 authored by René Heß's avatar René Heß
Browse files

[skip ci] Pass transformation string to autotuning

This can be used to distinguish between kernels with the same name but
different transformations.
parent 57069224
No related branches found
No related tags found
No related merge requests found
......@@ -304,8 +304,14 @@ def generate_standalone_code(sf, filename):
set_option("opcounter", opcounting)
def generate_standalone_kernel_code(kernel, signature, filename):
def generate_standalone_kernel_code(kernel, signature, filename, transformations=None):
with open(filename, 'w') as f:
if transformations:
f.write('// Transformations:\n')
for trafo in transformations:
f.write('// {}\n'.format(trafo))
f.write('\n')
# Write headers
headers = ['#include "config.h"',
'#include <iostream>',
......@@ -410,7 +416,17 @@ def generate_standalone_kernel_code(kernel, signature, filename):
f.write('\n'.join(main))
def autotune_realization(sf=None, kernel=None, signature=None):
def autotune_realization(sf=None, kernel=None, signature=None, transformations=None):
"""Generate an microbenchmark, compile run and return time
Parameters
----------
sf: SumfactKernel or VectorizedSumfactKernel
kernel: loopy.kernel.LoopKernel
signature: str
transformation: list of str
Will be used to distinguish between autotune targets
"""
if sf is None:
assert kernel is not None
assert signature is not None
......@@ -427,6 +443,9 @@ def autotune_realization(sf=None, kernel=None, signature=None):
basename = "autotune_sumfact_{}".format(kernel.name)
else:
basename = "autotune_sumfact_{}".format(sf.function_name)
if transformations:
for trafo in transformations:
basename = '{}_{}'.format(basename, trafo)
basename = hashlib.sha256(basename.encode()).hexdigest()
filename = os.path.join(dir, "{}.cc".format(basename))
......@@ -434,6 +453,8 @@ def autotune_realization(sf=None, kernel=None, signature=None):
lock = os.path.join(dir, "{}.lock".format(basename))
executable = os.path.join(dir, basename)
print("palpo filename: {}".format(filename))
# Generate and compile a benchmark program
#
# Note: cache restoring is only necessary when generating from SumfactKernel
......@@ -441,7 +462,7 @@ def autotune_realization(sf=None, kernel=None, signature=None):
with filelock.FileLock(lock):
if not os.path.isfile(logname):
if sf is None:
generate_standalone_kernel_code(kernel, signature, filename)
generate_standalone_kernel_code(kernel, signature, filename, transformations)
elif get_option("autotune_google_benchmark"):
generate_standalone_code_google_benchmark(sf, filename)
else:
......
......@@ -94,7 +94,7 @@ def move_zero_assignment_up(kernel, move_up_inames):
return kernel
def reorder_loops_in_tensor_contraction(kernel, iname_order):
def _reorder_loops_in_tensor_contraction_direct(kernel, iname_order):
"""Reorder the loop nest of the tensor contractions
iname_order is a string that specifies the loop order. We use the following convention:
......@@ -172,12 +172,15 @@ def reorder_loops_in_tensor_contraction(kernel, iname_order):
return kernel
def reorder_loops_in_tensor_contraction_with_accumulation_variable(kernel, iname_order):
def _reorder_loops_in_tensor_contraction_accum(kernel, iname_order):
dim = world_dimension()
assert dim == 3
if iname_order.endswith('j'):
return kernel
# kernel = remove_all_reductions(kernel)
kernel = reorder_loops_in_tensor_contraction(kernel, iname_order)
kernel = _reorder_loops_in_tensor_contraction_direct(kernel, iname_order)
cond = lp.match.Tagged('set_zero')
for instr in lp.find_instructions(kernel, cond):
......@@ -281,17 +284,30 @@ def reorder_loops_in_tensor_contraction_with_accumulation_variable(kernel, iname
return kernel
def reorder_loops_in_tensor_contraction(kernel, iname_order, accum_variable=True):
if accum_variable:
return _reorder_loops_in_tensor_contraction_accum(kernel, iname_order)
else:
return _reorder_loops_in_tensor_contraction_direct(kernel, iname_order)
def tensor_contraction_loop_order_generator(kernel):
dim = world_dimension()
assert dim == 3
yield kernel
indices = ['l', 'k', 'i', 'j']
import itertools
for loop_order in itertools.permutations(indices):
loop_order = ''.join(loop_order)
new_kernel = reorder_loops_in_tensor_contraction(kernel, loop_order)
yield new_kernel
# palpo TODO: Heavy culling for 'quick' tests during development
if loop_order[0] != 'l' or loop_order[1] != 'k':
continue
order = ''.join(loop_order)
new_kernel = reorder_loops_in_tensor_contraction(kernel, order, True)
yield new_kernel, ['reorder_loops_in_tensor_contraction_{}_True'.format(order),]
new_kernel = reorder_loops_in_tensor_contraction(kernel, loop_order, False)
yield new_kernel, ['reorder_loops_in_tensor_contraction_{}_False'.format(order),]
def simple_autotuner(kernel_generator, signature):
......@@ -299,11 +315,11 @@ def simple_autotuner(kernel_generator, signature):
from dune.codegen.options import set_option
set_option("autotune_google_benchmark", True)
kernel = next(kernel_generator)
best_cost = autotune_realization(kernel=kernel, signature=signature)
kernel, transformations = next(kernel_generator)
best_cost = autotune_realization(kernel=kernel, signature=signature, transformations=transformations)
best_kernel = kernel
for kernel in kernel_generator:
cost = autotune_realization(kernel=kernel, signature=signature)
for kernel, transformations in kernel_generator:
cost = autotune_realization(kernel=kernel, signature=signature, transformations=transformations)
if cost < best_cost:
best_cost = cost
best_kernel = kernel
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment