diff --git a/python/dune/codegen/sumfact/autotune.py b/python/dune/codegen/sumfact/autotune.py index a8dfd4ac2183e493012d61fa1811f988b498c040..64d9d43ab47a6651fff609153323072fe835af34 100644 --- a/python/dune/codegen/sumfact/autotune.py +++ b/python/dune/codegen/sumfact/autotune.py @@ -349,7 +349,7 @@ def generate_standalone_kernel_code(kernel, signature, filename): # Declare random generators real = type_floatingpoint() lines = [' std::uniform_real_distribution<{}> unif(0,1);'.format(real), - ' std::uniform_int_distribution<int> unif_int(0,128);', + ' std::uniform_int_distribution<int> unif_int(0,1);', ' std::default_random_engine re;'] f.write('\n'.join(lines) + '\n') @@ -369,6 +369,8 @@ def generate_standalone_kernel_code(kernel, signature, filename): else: assert 'fastdg' in arg.name size = reduce(mul, arg.shape) + min_stride = min([tag.stride for tag in arg.dim_tags]) + size *= min_stride alignment = arg.dtype.itemsize f.write(' {} {}[{}] __attribute__ ((aligned ({})));\n'.format(real, arg.name, diff --git a/python/dune/codegen/sumfact/transformations.py b/python/dune/codegen/sumfact/transformations.py index bbc6c4f7c4459608fb5bdd17602249cb2cf47a37..b7ab65cf80506fe107e117ee2bb06da2bee42eef 100644 --- a/python/dune/codegen/sumfact/transformations.py +++ b/python/dune/codegen/sumfact/transformations.py @@ -176,9 +176,9 @@ def sumfact_performance_transformations(kernel, signature): # kernel = reorder_loops_in_tensor_contraction(kernel, 'ijlk') # from dune.codegen.sumfact.autotune import autotune_realization + # from dune.codegen.options import set_option + # set_option("autotune_google_benchmark", True) # test = autotune_realization(kernel=kernel, signature=signature) - # from pudb import set_trace; set_trace() - pass return kernel