From 8097cac474e22090151af50b25f047926886c45c Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Thu, 14 Feb 2019 16:31:28 +0100 Subject: [PATCH] vectorize tail odd behavior for unstructured tensor test... --- .../codegen/blockstructured/vectorization.py | 59 +++++++++++++------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index b45d92f8..600ca11e 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -173,8 +173,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries)) -def add_vcl_access(knl, iname_inner, vcl_size): - from loopy.match import Reads, Tagged +def add_vcl_access(knl, iname_inner, vcl_size, levels=0): accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)), Iname(iname_inner)))))) read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) @@ -221,15 +220,15 @@ def add_vcl_access(knl, iname_inner, vcl_size): # compute index without vec iname strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) - index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) - if i != 0 and i.name != iname_inner)) + flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) + if not (isinstance(i, prim.Variable) and i.name == iname_inner))) # find write insns write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) # add load instruction load_id = idg('insn_' + name_vec + '_load') - call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), index)),)) + call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),)) load_insns.append(lp.CallInstruction(assignees=(), expression=call_load, id=load_id, within_inames=insn.within_inames | insn.reduction_inames(), depends_on=insn.depends_on | write_ids,)) @@ -248,15 +247,15 @@ def add_vcl_access(knl, iname_inner, vcl_size): # flat index without vec iname strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) - index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) - if i != 0 and i.name != iname_inner)) + flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) + if not (isinstance(i, prim.Variable) and i.name == iname_inner))) # find write insns write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) # add store instruction store_id = idg('insn_' + name_vec + '_store') - call_store = prim.Call(VCLStore(name_vec), (prim.Sum((prim.Variable(name_alias), index)),)) + call_store = prim.Call(VCLStore(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),)) store_insns.append(lp.CallInstruction(assignees=(), expression=call_store, id=store_id, within_inames=insn.within_inames, depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] | @@ -284,14 +283,17 @@ def add_vcl_access(knl, iname_inner, vcl_size): knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] - parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \ - ',ix,' + ','.join(['i' + d for d in dim_names[1:dim]]) + parameters = ','.join(['ex_o{}'.format(l) for l in range(levels + 1)]) + \ + ',v_i,' + \ + ','.join(['e' + d for d in dim_names[1:dim]]) + \ + ',ix,' + \ + ','.join(['i' + d for d in dim_names[1:dim]]) knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), parameters=parameters) new_subst = knl_with_subst_insns.substitutions.copy() rule = new_subst[alias + '_subst'] rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)), - (prim.Variable('ex_i'),)) + (prim.Variable('v_i'),)) knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) @@ -462,10 +464,14 @@ def realize_tail(knl, iname_inner, iname_outer, vcl_size): def vectorize_micro_elements(knl): vec_iname = "subel_x" + orig_iname = vec_iname if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell': vcl_size = get_vcl_type_size(np.float64) has_tail = get_form_option('number_of_blocks') % vcl_size > 0 + vectorize_tail = True + + knl = add_iname_array(knl, vec_iname) # manually add tail, since split_iname with slabs tries to vectorize the tail if has_tail: @@ -477,19 +483,34 @@ def vectorize_micro_elements(knl): knl = lp.split_iname(knl, vec_iname, vcl_size) knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + + tail_iname = vec_iname + '_inner' + '_tail' else: knl = lp.split_iname(knl, vec_iname, vcl_size) knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) - knl = add_iname_array(knl, vec_iname) - array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] - iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')] - knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size) - knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec')) + knl = lp.split_array_axis(knl, array_alias, 0, vcl_size) + + def _do_vectorization(knl, iname_inner, iname_outer, vcl_size, levels=0): + knl = add_vcl_iname_array(knl, orig_iname, iname_inner, vcl_size) + knl = add_vcl_temporaries(knl, vcl_size) + knl = add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size) + knl = add_vcl_access(knl, iname_inner, vcl_size, levels) + return knl + + knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + if has_tail and vectorize_tail: + vcl_size = vcl_size // 2 + vec_iname = tail_iname + + knl = lp.split_iname(knl, vec_iname, vcl_size) + knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) + + array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] + knl = lp.split_array_axis(knl, array_alias, 1, vcl_size) + + knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size, levels=1) - knl = add_vcl_temporaries(knl, vcl_size) - knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) - knl = add_vcl_access(knl, vec_iname + '_inner', vcl_size) return knl -- GitLab