From 3d7cb6ac8c72de750e12461e91fc807f0c8adc3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Wed, 3 Apr 2019 13:08:56 +0200
Subject: [PATCH] Bugfix in autotuning for loopy kernels

---
 python/dune/codegen/sumfact/autotune.py        | 4 +++-
 python/dune/codegen/sumfact/transformations.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py
index a8dfd4ac..64d9d43a 100644
--- a/python/dune/codegen/sumfact/autotune.py
+++ b/python/dune/codegen/sumfact/autotune.py
@@ -349,7 +349,7 @@ def generate_standalone_kernel_code(kernel, signature, filename):
         # Declare random generators
         real = type_floatingpoint()
         lines = ['  std::uniform_real_distribution<{}> unif(0,1);'.format(real),
-                 '  std::uniform_int_distribution<int> unif_int(0,128);',
+                 '  std::uniform_int_distribution<int> unif_int(0,1);',
                  '  std::default_random_engine re;']
         f.write('\n'.join(lines) + '\n')
 
@@ -369,6 +369,8 @@ def generate_standalone_kernel_code(kernel, signature, filename):
             else:
                 assert 'fastdg' in arg.name
                 size = reduce(mul, arg.shape)
+                min_stride = min([tag.stride for tag in arg.dim_tags])
+                size *= min_stride
                 alignment = arg.dtype.itemsize
                 f.write('  {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real,
                                                                                arg.name,
diff --git a/python/dune/codegen/sumfact/transformations.py b/python/dune/codegen/sumfact/transformations.py
index bbc6c4f7..b7ab65cf 100644
--- a/python/dune/codegen/sumfact/transformations.py
+++ b/python/dune/codegen/sumfact/transformations.py
@@ -176,9 +176,9 @@ def sumfact_performance_transformations(kernel, signature):
         # kernel = reorder_loops_in_tensor_contraction(kernel, 'ijlk')
 
         # from dune.codegen.sumfact.autotune import autotune_realization
+        # from dune.codegen.options import set_option
+        # set_option("autotune_google_benchmark", True)
         # test = autotune_realization(kernel=kernel, signature=signature)
 
-        # from pudb import set_trace; set_trace()
-
         pass
     return kernel
-- 
GitLab