From fed5721ed2918d419c04c08b0f86855a4e789989 Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 15 Feb 2019 12:29:39 +0100 Subject: [PATCH] ensure ordering: vectorized code before tail --- .../codegen/blockstructured/vectorization.py | 93 ++++++++++++------- 1 file changed, 60 insertions(+), 33 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 681551ca..b68d4b10 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -41,7 +41,7 @@ def add_vcl_temporaries(knl, vcl_size): iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()})) -def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): +def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level): nptype = dtype_floatingpoint() accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(inner_iname)))) @@ -93,13 +93,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): var_right = prim.Subscript(prim.Variable(identifier_right), (prim.Variable(inner_iname),)) # init a - id_init_a = idg('{}_init_' + identifier_left) + id_init_a = idg('insn_init_' + identifier_left) new_insns.append(lp.Assignment(assignee=substitute(var_left, replace_head_inames), expression=0, id=id_init_a, within_inames=(insn.within_inames - frozenset({outer_iname}) - inames_micro) | inames_head, - tags=frozenset({'head_vec{}'.format(vcl_size)}))) + tags=frozenset({'head_vec{}'.format(vcl_size), + 'vectorized_{}'.format(level)}))) # setze werte für a und b expr_right = substitute(expr_without_r, {iname_ix: 1}) @@ -111,12 +112,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): expression=expr_right, id=id_set_right, depends_on=insn.depends_on, - within_inames=insn.within_inames - frozenset({iname_ix}))) + within_inames=insn.within_inames - frozenset({iname_ix}), + tags=frozenset({'vectorized_{}'.format(level)}))) new_insns.append(lp.Assignment(assignee=var_left, expression=expr_left, id=id_set_left, depends_on=insn.depends_on | frozenset({id_init_a}), - within_inames=insn.within_inames - frozenset({iname_ix}))) + within_inames=insn.within_inames - frozenset({iname_ix}), + tags=frozenset({'vectorized_{}'.format(level)}))) # r+=a[iy] id_accum = idg('{}_mod_accum'.format(insn.id)) @@ -130,7 +133,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): depends_on=insn.depends_on | frozenset({id_set_left, id_init_a, id_set_right}), within_inames=insn.within_inames - frozenset({iname_ix}), - tags=frozenset({'accum_vec{}'.format(vcl_size)}))) + tags=frozenset({'accum_vec{}'.format(vcl_size), + 'vectorized_{}'.format(level)}))) # a[iy] = permute id_permute = idg('{}_permute'.format(insn.id)) expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)), @@ -140,7 +144,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): id=id_permute, depends_on=insn.depends_on | frozenset({id_set_left, id_init_a, id_set_right, id_accum}), - within_inames=insn.within_inames - frozenset({iname_ix}) + within_inames=insn.within_inames - frozenset({iname_ix}), + tags=frozenset({'vectorized_{}'.format(level)}) )) # tail handling, uses tail alias @@ -161,7 +166,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): frozenset(write_to_tail_ids)), within_inames=(insn.within_inames - frozenset({inner_iname, outer_iname}) - inames_micro) | inames_tail, - tags=frozenset({'tail_vec{}'.format(vcl_size)}))) + tags=frozenset({'tail_vec{}'.format(vcl_size), + 'vectorized_{}'.format(level)}))) else: if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids: accum_id = insn.id.replace('_tail', '') @@ -240,7 +246,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),)) load_insns.append(lp.CallInstruction(assignees=(), expression=call_load, id=load_id, within_inames=insn.within_inames | insn.reduction_inames(), - depends_on=insn.depends_on | write_ids,)) + depends_on=insn.depends_on | write_ids, + tags=frozenset({'vectorized_{}'.format(level)}))) read_dependencies.setdefault(id, set()) read_dependencies[id].add(load_id) @@ -268,7 +275,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): store_insns.append(lp.CallInstruction(assignees=(), expression=call_store, id=store_id, within_inames=insn.within_inames, depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] | - write_ids))) + write_ids), + tags=frozenset({'vectorized_{}'.format(level)}))) # replace alias with vcl vector, except for accumulation assignee vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)] @@ -291,14 +299,15 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): new_insns.append(insn) knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) - # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] + # substitution rule for alias[[ex_o]*l,ex_inner, ey, ix, iy] -> vec[ex_inner] parameters = ','.join(['ex_o{}'.format(l) for l in range(level + 1)]) + \ ',v_i,' + \ ','.join(['e' + d for d in dim_names[1:dim]]) + \ ',ix,' + \ ','.join(['i' + d for d in dim_names[1:dim]]) - knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), - parameters=parameters) + knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, + alias + '_subst', '{}[{}]'.format(alias, parameters), + parameters=parameters) new_subst = knl_with_subst_insns.substitutions.copy() rule = new_subst[alias + '_subst'] rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)), @@ -330,10 +339,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): raise CodegenVectorizationError new_insns.append(insn.copy(assignee=assignee_vec, depends_on=(insn.depends_on | read_dependencies[insn.id] | - write_ids))) + write_ids), + tags=insn.tags | frozenset({'vectorized_{}'.format(level)}))) else: new_insns.append(insn.copy(depends_on=(insn.depends_on | read_dependencies[insn.id] | - write_ids))) + write_ids), + tags=insn.tags | frozenset({'vectorized_{}'.format(level)}))) return knl.copy(instructions=new_insns + load_insns + store_insns) @@ -387,29 +398,28 @@ def add_iname_array(knl, iname): return knl -def add_vcl_iname_array(knl, iname, vec_iname, vcl_size): +def add_vcl_iname_array(knl, iname, vec_iname, vcl_size, level): insns_with_macro_points = lp.find_instructions(knl, And((Tagged(iname), Iname(vec_iname)))) if insns_with_macro_points: iname_array = iname + '_arr' vector_name = iname + '_vec{}'.format(vcl_size) - new_temporaries = dict() - new_temporaries[vector_name] = DuneTemporaryVariable(vector_name, managed=True, - shape=(get_form_option('number_of_blocks'),), - scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, - base_storage=iname_array + '_buff', - _base_storage_access_may_be_aliasing=True) + new_temporaries = {vector_name: DuneTemporaryVariable(vector_name, managed=True, + shape=(get_form_option('number_of_blocks'),), + scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, + base_storage=iname_array + '_buff', + _base_storage_access_may_be_aliasing=True)} silenced_warning = ["read_no_write({})".format(vector_name)] - replacemap = dict() - replacemap[iname_array] = prim.Variable(vector_name) + replacemap = {iname_array: prim.Variable(vector_name)} new_insns = [] for insn in knl.instructions: if insn in insns_with_macro_points: transformed_insn = insn.with_transformed_expressions(lambda expr: substitute(expr, replacemap)) - new_insns.append(transformed_insn.copy(depends_on='init_{}_buffer'.format(iname_array))) + new_insns.append(transformed_insn.copy(depends_on='init_{}_buffer'.format(iname_array), + tags=insn.tags | frozenset({'vectorized_{}'.format(level)}))) else: new_insns.append(insn) @@ -423,7 +433,7 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size): return knl -def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_size): +def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_size, level): tail_size = get_form_option('number_of_blocks') % vcl_size new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(tail_iname, tail_size)) @@ -451,7 +461,8 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz new_within_inames = frozenset((iname + '_tail' if iname == inner_iname else iname for iname in insn.within_inames)) - frozenset({outer_iname}) new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on, - within_inames=new_within_inames)) + within_inames=new_within_inames, + tags=insn.tags | frozenset({'tail_{}'.format(level)}))) knl = knl.copy(domains=knl.domains + [new_dom], instructions=knl.instructions + new_insns, temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate)) @@ -471,6 +482,21 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz return lp.make_reduction_inames_unique(knl) +def add_tail_dependencies(knl, level): + vectorized_insns = lp.find_instructions(knl, Tagged('vectorized_{}'.format(level))) + vectorized_ids = frozenset((insn.id for insn in vectorized_insns)) + tail_insns = lp.find_instructions(knl, Tagged('tail_{}'.format(level))) + + new_insns = [] + for insn in knl.instructions: + if insn in tail_insns: + new_insns.append(insn.copy(depends_on=insn.depends_on | vectorized_ids)) + else: + new_insns.append(insn) + + return knl.copy(instructions=new_insns) + + def vectorize_micro_elements(knl): vec_iname = "subel_x" orig_iname = vec_iname @@ -503,7 +529,7 @@ def vectorize_micro_elements(knl): knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname) tail_iname = vec_iname + '_inner' + '_tail' - knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size) + knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level) else: knl = lp.split_iname(knl, vec_iname, vcl_size) @@ -512,16 +538,17 @@ def vectorize_micro_elements(knl): array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] knl = lp.split_array_axis(knl, array_alias, level, vcl_size) - knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size) knl = add_vcl_temporaries(knl, vcl_size) - knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size) + knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level) + knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level) knl = add_vcl_access(knl, inner_iname, vcl_size, level) - if tail_size > 0 and vectorize_tail: - knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) + if tail_size > 0: + knl = add_tail_dependencies(knl, level) + if vectorize_tail: + knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) return knl knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size) - return knl -- GitLab