diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py index 6d37a47f57eb194c8ce13eb4611c4dd1872e3f68..7cd68f05f9445e31e01f9d9a5a1ffc47893e29d8 100644 --- a/python/dune/perftool/blockstructured/vectorization.py +++ b/python/dune/perftool/blockstructured/vectorization.py @@ -310,27 +310,31 @@ def find_accumulation_inames(knl): return inames -def prepare_macro_points(knl, vec_iname): +def add_iname_array(knl, vec_iname): insns_with_macro_points = lp.find_instructions(knl, Tagged(vec_iname)) if insns_with_macro_points: - from pudb import set_trace; set_trace() - tmp = vec_iname + '_arr' + array_name = vec_iname + '_arr' + vector_name = vec_iname + '_vec' new_temporaries = dict() - new_temporaries[tmp] = DuneTemporaryVariable(tmp, managed=True, shape=(get_form_option('number_of_blocks'),), - scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, - base_storage=vec_iname + '_buff', - _base_storage_access_may_be_aliasing=True) + new_temporaries[array_name] = DuneTemporaryVariable(array_name, managed=True, shape=(get_form_option('number_of_blocks'),), + scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, + base_storage=vec_iname + '_buff', + _base_storage_access_may_be_aliasing=True) + new_temporaries[vector_name] = DuneTemporaryVariable(vector_name, managed=True, shape=(get_form_option('number_of_blocks'),), + scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, + base_storage=vec_iname + '_buff', + _base_storage_access_may_be_aliasing=True) new_insns = [] - new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(tmp), (prim.Variable(vec_iname),)), + new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(array_name), (prim.Variable(vec_iname),)), expression=prim.Variable(vec_iname), id='init_{}_buffer'.format(vec_iname), within_inames=frozenset({vec_iname}), within_inames_is_final=True)) replacemap = dict() - replacemap[vec_iname] = prim.Subscript(prim.Variable(tmp), (prim.Variable(vec_iname),)) + replacemap[vec_iname] = prim.Subscript(prim.Variable(vector_name), (prim.Variable(vec_iname),)) for insn in knl.instructions: if insn in insns_with_macro_points: @@ -354,11 +358,13 @@ def vectorize_micro_elements(knl): assert get_form_option('number_of_blocks') % vcl_size == 0 - knl = prepare_macro_points(knl, vec_iname) + knl = add_iname_array(knl, vec_iname) knl = lp.split_iname(knl, vec_iname, vcl_size, inner_tag='vec') array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] - knl = lp.split_array_axis(knl, array_alias, 0, vcl_size) + iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')] + knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size) + knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec')) knl = add_vcl_temporaries(knl) knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer')