diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index e78905127f6912b89de4c70ae548de58ba7d6bb8..372e645c7d8d18c2c39da850be9034fd32aa7d51 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -30,6 +30,7 @@ def rotate_function_mangler(knl, func, arg_dtypes): # This is not 100% within the loopy philosophy, as we are # passing the vector registers as references and have them # changed. Loopy assumes this function to be read-only. + include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile") vcl = lp.types.NumpyType(get_vcl_type(np.float64, register_size=256)) return lp.CallMangleInfo("transpose_reg", (), (vcl, vcl, vcl, vcl)) @@ -72,8 +73,6 @@ def collect_vector_data_rotate(knl): new_insns = [] all_writers = [] - tags = frozenset().union(*tuple(i.tags for i in insns)) - rotating = "gradvec" in tags # # Inspect the given instructions for dependent quantities @@ -281,16 +280,19 @@ def collect_vector_data_rotate(knl): lhsname = get_pymbolic_basename(insn.assignee) knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True) lhsname = get_vector_view_name(lhsname) + rotating = "gradvec" in insn.tags if rotating: assert isinstance(insn.assignee, prim.Subscript) last_index = insn.assignee.index[-1] assert last_index in tuple(range(4)) + vec_index_size = vec_size else: last_index = 0 + vec_index_size = 1 new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname), - (vector_indices.get(vec_size) + last_index, prim.Variable(new_iname)), + (vector_indices.get(vec_index_size) + last_index, prim.Variable(new_iname)), ), substitute(insn.expression, replacemap_vec), depends_on=frozenset({"continue_stmt"}),