Skip to content
Snippets Groups Projects
Commit 3d7cb6ac authored by René Heß's avatar René Heß
Browse files

Bugfix in autotuning for loopy kernels

parent 5baf3ae5
No related branches found
No related tags found
No related merge requests found
...@@ -349,7 +349,7 @@ def generate_standalone_kernel_code(kernel, signature, filename): ...@@ -349,7 +349,7 @@ def generate_standalone_kernel_code(kernel, signature, filename):
# Declare random generators # Declare random generators
real = type_floatingpoint() real = type_floatingpoint()
lines = [' std::uniform_real_distribution<{}> unif(0,1);'.format(real), lines = [' std::uniform_real_distribution<{}> unif(0,1);'.format(real),
' std::uniform_int_distribution<int> unif_int(0,128);', ' std::uniform_int_distribution<int> unif_int(0,1);',
' std::default_random_engine re;'] ' std::default_random_engine re;']
f.write('\n'.join(lines) + '\n') f.write('\n'.join(lines) + '\n')
...@@ -369,6 +369,8 @@ def generate_standalone_kernel_code(kernel, signature, filename): ...@@ -369,6 +369,8 @@ def generate_standalone_kernel_code(kernel, signature, filename):
else: else:
assert 'fastdg' in arg.name assert 'fastdg' in arg.name
size = reduce(mul, arg.shape) size = reduce(mul, arg.shape)
min_stride = min([tag.stride for tag in arg.dim_tags])
size *= min_stride
alignment = arg.dtype.itemsize alignment = arg.dtype.itemsize
f.write(' {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real, f.write(' {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real,
arg.name, arg.name,
......
...@@ -176,9 +176,9 @@ def sumfact_performance_transformations(kernel, signature): ...@@ -176,9 +176,9 @@ def sumfact_performance_transformations(kernel, signature):
# kernel = reorder_loops_in_tensor_contraction(kernel, 'ijlk') # kernel = reorder_loops_in_tensor_contraction(kernel, 'ijlk')
# from dune.codegen.sumfact.autotune import autotune_realization # from dune.codegen.sumfact.autotune import autotune_realization
# from dune.codegen.options import set_option
# set_option("autotune_google_benchmark", True)
# test = autotune_realization(kernel=kernel, signature=signature) # test = autotune_realization(kernel=kernel, signature=signature)
# from pudb import set_trace; set_trace()
pass pass
return kernel return kernel
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment