From 3d7cb6ac8c72de750e12461e91fc807f0c8adc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Wed, 3 Apr 2019 13:08:56 +0200 Subject: [PATCH] Bugfix in autotuning for loopy kernels --- python/dune/codegen/sumfact/autotune.py | 4 +++- python/dune/codegen/sumfact/transformations.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index a8dfd4ac..64d9d43a 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -349,7 +349,7 @@ def generate_standalone_kernel_code(kernel, signature, filename): # Declare random generators real = type_floatingpoint() lines = [' std::uniform_real_distribution<{}> unif(0,1);'.format(real), - ' std::uniform_int_distribution<int> unif_int(0,128);', + ' std::uniform_int_distribution<int> unif_int(0,1);', ' std::default_random_engine re;'] f.write('\n'.join(lines) + '\n') @@ -369,6 +369,8 @@ def generate_standalone_kernel_code(kernel, signature, filename): else: assert 'fastdg' in arg.name size = reduce(mul, arg.shape) + min_stride = min([tag.stride for tag in arg.dim_tags]) + size *= min_stride alignment = arg.dtype.itemsize f.write(' {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real, arg.name, diff --git a/python/dune/codegen/sumfact/transformations.py b/python/dune/codegen/sumfact/transformations.py index bbc6c4f7..b7ab65cf 100644 --- a/python/dune/codegen/sumfact/transformations.py +++ b/python/dune/codegen/sumfact/transformations.py @@ -176,9 +176,9 @@ def sumfact_performance_transformations(kernel, signature): # kernel = reorder_loops_in_tensor_contraction(kernel, 'ijlk') # from dune.codegen.sumfact.autotune import autotune_realization + # from dune.codegen.options import set_option + # set_option("autotune_google_benchmark", True) # test = autotune_realization(kernel=kernel, signature=signature) - # from pudb import set_trace; set_trace() - pass return kernel -- GitLab