diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 600ca11e29fc2306816fef8f7b5be3b07373d623..03ce2cf678f57d10c6d76a81aaf89fa3fe79af98 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -41,10 +41,10 @@ def add_vcl_temporaries(knl, vcl_size): iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()})) -def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): +def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): nptype = dtype_floatingpoint() - accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))) + accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(inner_iname)))) accum_ids = [insn.id for insn in accum_insns] new_insns = [] @@ -89,15 +89,15 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): var_left = prim.Subscript(prim.Variable(identifier_left), tuple(prim.Variable(i) for i in sorted(inames_micro - frozenset({iname_ix}))) + - (prim.Variable(iname_inner),)) - var_right = prim.Subscript(prim.Variable(identifier_right), (prim.Variable(iname_inner),)) + (prim.Variable(inner_iname),)) + var_right = prim.Subscript(prim.Variable(identifier_right), (prim.Variable(inner_iname),)) # init a id_init_a = idg('{}_init_' + identifier_left) new_insns.append(lp.Assignment(assignee=substitute(var_left, replace_head_inames), expression=0, id=id_init_a, - within_inames=(insn.within_inames - frozenset({iname_outer}) - + within_inames=(insn.within_inames - frozenset({outer_iname}) - inames_micro) | inames_head, tags=frozenset({'head_vec{}'.format(vcl_size)}))) @@ -145,11 +145,11 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): # tail handling, uses tail alias id_accum_tail = idg('{}_accum_tail'.format(insn.id)) - subst_map = {iname_inner: vcl_size - 1, iname_outer: get_form_option("number_of_blocks") // vcl_size - 1, + subst_map = {inner_iname: vcl_size - 1, outer_iname: get_form_option("number_of_blocks") // vcl_size - 1, iname_ix: 1, insn.assignee_name: prim.Variable(insn.assignee_name + '_tail'), **replace_tail_inames} assignee_tail = substitute(insn.assignee, subst_map) - expr_tail = prim.Sum((substitute(var_left, {iname_inner: 0, **replace_tail_inames}), assignee_tail)) + expr_tail = prim.Sum((substitute(var_left, {inner_iname: 0, **replace_tail_inames}), assignee_tail)) write_to_tail_ids = tuple(i.id for i in lp.find_instructions(knl, Writes(get_pymbolic_basename(assignee_tail)))) @@ -159,7 +159,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): id=id_accum_tail, depends_on=(frozenset({id_accum, id_permute, id_set_left, id_init_a}) | frozenset(write_to_tail_ids)), - within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer}) - + within_inames=(insn.within_inames - frozenset({inner_iname, outer_iname}) - inames_micro) | inames_tail, tags=frozenset({'tail_vec{}'.format(vcl_size)}))) else: @@ -173,10 +173,10 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries)) -def add_vcl_access(knl, iname_inner, vcl_size, levels=0): +def add_vcl_access(knl, inner_iname, vcl_size, level=0): accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)), - Iname(iname_inner)))))) - read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) + Iname(inner_iname)))))) + read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(inner_iname)))))) vectorized_insns = accum_insns | read_insns alias_suffix = 'alias' @@ -221,7 +221,7 @@ def add_vcl_access(knl, iname_inner, vcl_size, levels=0): # compute index without vec iname strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) - if not (isinstance(i, prim.Variable) and i.name == iname_inner))) + if not (isinstance(i, prim.Variable) and i.name == inner_iname))) # find write insns write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) @@ -248,7 +248,7 @@ def add_vcl_access(knl, iname_inner, vcl_size, levels=0): # flat index without vec iname strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) - if not (isinstance(i, prim.Variable) and i.name == iname_inner))) + if not (isinstance(i, prim.Variable) and i.name == inner_iname))) # find write insns write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) @@ -266,7 +266,7 @@ def add_vcl_access(knl, iname_inner, vcl_size, levels=0): dim = world_dimension() dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] # remove CInstructions since loopy extract expects to get only assignments - knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(iname_inner)) + knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(inner_iname)) if not isinstance(insn, lp.CInstruction)]) for alias in vector_alias: # Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs @@ -283,7 +283,7 @@ def add_vcl_access(knl, iname_inner, vcl_size, levels=0): knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] - parameters = ','.join(['ex_o{}'.format(l) for l in range(levels + 1)]) + \ + parameters = ','.join(['ex_o{}'.format(l) for l in range(level + 1)]) + \ ',v_i,' + \ ','.join(['e' + d for d in dim_names[1:dim]]) + \ ',ix,' + \ @@ -296,7 +296,7 @@ def add_vcl_access(knl, iname_inner, vcl_size, levels=0): (prim.Variable('v_i'),)) knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) - knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) + knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(inner_iname)) knl = knl.copy(instructions=knl_with_subst_insns.instructions + [insn for insn in knl.instructions if insn.id not in knl_with_subst_insns.id_to_insn]) @@ -414,15 +414,15 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size): return knl -def realize_tail(knl, iname_inner, iname_outer, vcl_size): +def realize_tail(knl, inner_iname, outer_iname, tail_iname, vcl_size): tail_size = get_form_option('number_of_blocks') % vcl_size - new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size)) + new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(tail_iname, tail_size)) - insns_to_duplicate = lp.find_instructions(knl, Iname(iname_inner)) + insns_to_duplicate = lp.find_instructions(knl, Iname(inner_iname)) ids_to_duplicate = tuple((insn.id for insn in insns_to_duplicate)) - subst_map = dict([(iname_outer, get_form_option('number_of_blocks') // vcl_size), - (iname_inner, prim.Variable(iname_inner + '_tail'))]) + subst_map = dict([(outer_iname, get_form_option('number_of_blocks') // vcl_size), + (inner_iname, prim.Variable(tail_iname))]) temporaries_to_duplicate = dict() for insn in insns_to_duplicate: @@ -439,8 +439,8 @@ def realize_tail(knl, iname_inner, iname_outer, vcl_size): new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map)) new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id for insn_id in insn.depends_on)) - new_within_inames = frozenset((iname + '_tail' if iname == iname_inner else iname - for iname in insn.within_inames)) - frozenset({iname_outer}) + new_within_inames = frozenset((iname + '_tail' if iname == inner_iname else iname + for iname in insn.within_inames)) - frozenset({outer_iname}) new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on, within_inames=new_within_inames))