Skip to content
Snippets Groups Projects
Commit fed5721e authored by Marcel Koch's avatar Marcel Koch
Browse files

ensure ordering: vectorized code before tail

parent 70b9b0bb
No related branches found
No related tags found
No related merge requests found
...@@ -41,7 +41,7 @@ def add_vcl_temporaries(knl, vcl_size): ...@@ -41,7 +41,7 @@ def add_vcl_temporaries(knl, vcl_size):
iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()})) iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()}))
def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level):
nptype = dtype_floatingpoint() nptype = dtype_floatingpoint()
accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(inner_iname)))) accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(inner_iname))))
...@@ -93,13 +93,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): ...@@ -93,13 +93,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
var_right = prim.Subscript(prim.Variable(identifier_right), (prim.Variable(inner_iname),)) var_right = prim.Subscript(prim.Variable(identifier_right), (prim.Variable(inner_iname),))
# init a # init a
id_init_a = idg('{}_init_' + identifier_left) id_init_a = idg('insn_init_' + identifier_left)
new_insns.append(lp.Assignment(assignee=substitute(var_left, replace_head_inames), new_insns.append(lp.Assignment(assignee=substitute(var_left, replace_head_inames),
expression=0, expression=0,
id=id_init_a, id=id_init_a,
within_inames=(insn.within_inames - frozenset({outer_iname}) - within_inames=(insn.within_inames - frozenset({outer_iname}) -
inames_micro) | inames_head, inames_micro) | inames_head,
tags=frozenset({'head_vec{}'.format(vcl_size)}))) tags=frozenset({'head_vec{}'.format(vcl_size),
'vectorized_{}'.format(level)})))
# setze werte für a und b # setze werte für a und b
expr_right = substitute(expr_without_r, {iname_ix: 1}) expr_right = substitute(expr_without_r, {iname_ix: 1})
...@@ -111,12 +112,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): ...@@ -111,12 +112,14 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
expression=expr_right, expression=expr_right,
id=id_set_right, id=id_set_right,
depends_on=insn.depends_on, depends_on=insn.depends_on,
within_inames=insn.within_inames - frozenset({iname_ix}))) within_inames=insn.within_inames - frozenset({iname_ix}),
tags=frozenset({'vectorized_{}'.format(level)})))
new_insns.append(lp.Assignment(assignee=var_left, new_insns.append(lp.Assignment(assignee=var_left,
expression=expr_left, expression=expr_left,
id=id_set_left, id=id_set_left,
depends_on=insn.depends_on | frozenset({id_init_a}), depends_on=insn.depends_on | frozenset({id_init_a}),
within_inames=insn.within_inames - frozenset({iname_ix}))) within_inames=insn.within_inames - frozenset({iname_ix}),
tags=frozenset({'vectorized_{}'.format(level)})))
# r+=a[iy] # r+=a[iy]
id_accum = idg('{}_mod_accum'.format(insn.id)) id_accum = idg('{}_mod_accum'.format(insn.id))
...@@ -130,7 +133,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): ...@@ -130,7 +133,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
depends_on=insn.depends_on | frozenset({id_set_left, depends_on=insn.depends_on | frozenset({id_set_left,
id_init_a, id_set_right}), id_init_a, id_set_right}),
within_inames=insn.within_inames - frozenset({iname_ix}), within_inames=insn.within_inames - frozenset({iname_ix}),
tags=frozenset({'accum_vec{}'.format(vcl_size)}))) tags=frozenset({'accum_vec{}'.format(vcl_size),
'vectorized_{}'.format(level)})))
# a[iy] = permute # a[iy] = permute
id_permute = idg('{}_permute'.format(insn.id)) id_permute = idg('{}_permute'.format(insn.id))
expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)), expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)),
...@@ -140,7 +144,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): ...@@ -140,7 +144,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
id=id_permute, id=id_permute,
depends_on=insn.depends_on | frozenset({id_set_left, id_init_a, id_set_right, depends_on=insn.depends_on | frozenset({id_set_left, id_init_a, id_set_right,
id_accum}), id_accum}),
within_inames=insn.within_inames - frozenset({iname_ix}) within_inames=insn.within_inames - frozenset({iname_ix}),
tags=frozenset({'vectorized_{}'.format(level)})
)) ))
# tail handling, uses tail alias # tail handling, uses tail alias
...@@ -161,7 +166,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size): ...@@ -161,7 +166,8 @@ def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size):
frozenset(write_to_tail_ids)), frozenset(write_to_tail_ids)),
within_inames=(insn.within_inames - frozenset({inner_iname, outer_iname}) - within_inames=(insn.within_inames - frozenset({inner_iname, outer_iname}) -
inames_micro) | inames_tail, inames_micro) | inames_tail,
tags=frozenset({'tail_vec{}'.format(vcl_size)}))) tags=frozenset({'tail_vec{}'.format(vcl_size),
'vectorized_{}'.format(level)})))
else: else:
if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids: if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids:
accum_id = insn.id.replace('_tail', '') accum_id = insn.id.replace('_tail', '')
...@@ -240,7 +246,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): ...@@ -240,7 +246,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),)) call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),))
load_insns.append(lp.CallInstruction(assignees=(), expression=call_load, load_insns.append(lp.CallInstruction(assignees=(), expression=call_load,
id=load_id, within_inames=insn.within_inames | insn.reduction_inames(), id=load_id, within_inames=insn.within_inames | insn.reduction_inames(),
depends_on=insn.depends_on | write_ids,)) depends_on=insn.depends_on | write_ids,
tags=frozenset({'vectorized_{}'.format(level)})))
read_dependencies.setdefault(id, set()) read_dependencies.setdefault(id, set())
read_dependencies[id].add(load_id) read_dependencies[id].add(load_id)
...@@ -268,7 +275,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): ...@@ -268,7 +275,8 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
store_insns.append(lp.CallInstruction(assignees=(), expression=call_store, store_insns.append(lp.CallInstruction(assignees=(), expression=call_store,
id=store_id, within_inames=insn.within_inames, id=store_id, within_inames=insn.within_inames,
depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] | depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] |
write_ids))) write_ids),
tags=frozenset({'vectorized_{}'.format(level)})))
# replace alias with vcl vector, except for accumulation assignee # replace alias with vcl vector, except for accumulation assignee
vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)] vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)]
...@@ -291,14 +299,15 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): ...@@ -291,14 +299,15 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
new_insns.append(insn) new_insns.append(insn)
knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns)
# substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] # substitution rule for alias[[ex_o]*l,ex_inner, ey, ix, iy] -> vec[ex_inner]
parameters = ','.join(['ex_o{}'.format(l) for l in range(level + 1)]) + \ parameters = ','.join(['ex_o{}'.format(l) for l in range(level + 1)]) + \
',v_i,' + \ ',v_i,' + \
','.join(['e' + d for d in dim_names[1:dim]]) + \ ','.join(['e' + d for d in dim_names[1:dim]]) + \
',ix,' + \ ',ix,' + \
','.join(['i' + d for d in dim_names[1:dim]]) ','.join(['i' + d for d in dim_names[1:dim]])
knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns,
parameters=parameters) alias + '_subst', '{}[{}]'.format(alias, parameters),
parameters=parameters)
new_subst = knl_with_subst_insns.substitutions.copy() new_subst = knl_with_subst_insns.substitutions.copy()
rule = new_subst[alias + '_subst'] rule = new_subst[alias + '_subst']
rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)), rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)),
...@@ -330,10 +339,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): ...@@ -330,10 +339,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
raise CodegenVectorizationError raise CodegenVectorizationError
new_insns.append(insn.copy(assignee=assignee_vec, new_insns.append(insn.copy(assignee=assignee_vec,
depends_on=(insn.depends_on | read_dependencies[insn.id] | depends_on=(insn.depends_on | read_dependencies[insn.id] |
write_ids))) write_ids),
tags=insn.tags | frozenset({'vectorized_{}'.format(level)})))
else: else:
new_insns.append(insn.copy(depends_on=(insn.depends_on | read_dependencies[insn.id] | new_insns.append(insn.copy(depends_on=(insn.depends_on | read_dependencies[insn.id] |
write_ids))) write_ids),
tags=insn.tags | frozenset({'vectorized_{}'.format(level)})))
return knl.copy(instructions=new_insns + load_insns + store_insns) return knl.copy(instructions=new_insns + load_insns + store_insns)
...@@ -387,29 +398,28 @@ def add_iname_array(knl, iname): ...@@ -387,29 +398,28 @@ def add_iname_array(knl, iname):
return knl return knl
def add_vcl_iname_array(knl, iname, vec_iname, vcl_size): def add_vcl_iname_array(knl, iname, vec_iname, vcl_size, level):
insns_with_macro_points = lp.find_instructions(knl, And((Tagged(iname), Iname(vec_iname)))) insns_with_macro_points = lp.find_instructions(knl, And((Tagged(iname), Iname(vec_iname))))
if insns_with_macro_points: if insns_with_macro_points:
iname_array = iname + '_arr' iname_array = iname + '_arr'
vector_name = iname + '_vec{}'.format(vcl_size) vector_name = iname + '_vec{}'.format(vcl_size)
new_temporaries = dict() new_temporaries = {vector_name: DuneTemporaryVariable(vector_name, managed=True,
new_temporaries[vector_name] = DuneTemporaryVariable(vector_name, managed=True, shape=(get_form_option('number_of_blocks'),),
shape=(get_form_option('number_of_blocks'),), scope=lp.temp_var_scope.PRIVATE, dtype=np.float64,
scope=lp.temp_var_scope.PRIVATE, dtype=np.float64, base_storage=iname_array + '_buff',
base_storage=iname_array + '_buff', _base_storage_access_may_be_aliasing=True)}
_base_storage_access_may_be_aliasing=True)
silenced_warning = ["read_no_write({})".format(vector_name)] silenced_warning = ["read_no_write({})".format(vector_name)]
replacemap = dict() replacemap = {iname_array: prim.Variable(vector_name)}
replacemap[iname_array] = prim.Variable(vector_name)
new_insns = [] new_insns = []
for insn in knl.instructions: for insn in knl.instructions:
if insn in insns_with_macro_points: if insn in insns_with_macro_points:
transformed_insn = insn.with_transformed_expressions(lambda expr: substitute(expr, replacemap)) transformed_insn = insn.with_transformed_expressions(lambda expr: substitute(expr, replacemap))
new_insns.append(transformed_insn.copy(depends_on='init_{}_buffer'.format(iname_array))) new_insns.append(transformed_insn.copy(depends_on='init_{}_buffer'.format(iname_array),
tags=insn.tags | frozenset({'vectorized_{}'.format(level)})))
else: else:
new_insns.append(insn) new_insns.append(insn)
...@@ -423,7 +433,7 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size): ...@@ -423,7 +433,7 @@ def add_vcl_iname_array(knl, iname, vec_iname, vcl_size):
return knl return knl
def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_size): def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_size, level):
tail_size = get_form_option('number_of_blocks') % vcl_size tail_size = get_form_option('number_of_blocks') % vcl_size
new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(tail_iname, tail_size)) new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(tail_iname, tail_size))
...@@ -451,7 +461,8 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz ...@@ -451,7 +461,8 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
new_within_inames = frozenset((iname + '_tail' if iname == inner_iname else iname new_within_inames = frozenset((iname + '_tail' if iname == inner_iname else iname
for iname in insn.within_inames)) - frozenset({outer_iname}) for iname in insn.within_inames)) - frozenset({outer_iname})
new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on, new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on,
within_inames=new_within_inames)) within_inames=new_within_inames,
tags=insn.tags | frozenset({'tail_{}'.format(level)})))
knl = knl.copy(domains=knl.domains + [new_dom], instructions=knl.instructions + new_insns, knl = knl.copy(domains=knl.domains + [new_dom], instructions=knl.instructions + new_insns,
temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate)) temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate))
...@@ -471,6 +482,21 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz ...@@ -471,6 +482,21 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
return lp.make_reduction_inames_unique(knl) return lp.make_reduction_inames_unique(knl)
def add_tail_dependencies(knl, level):
vectorized_insns = lp.find_instructions(knl, Tagged('vectorized_{}'.format(level)))
vectorized_ids = frozenset((insn.id for insn in vectorized_insns))
tail_insns = lp.find_instructions(knl, Tagged('tail_{}'.format(level)))
new_insns = []
for insn in knl.instructions:
if insn in tail_insns:
new_insns.append(insn.copy(depends_on=insn.depends_on | vectorized_ids))
else:
new_insns.append(insn)
return knl.copy(instructions=new_insns)
def vectorize_micro_elements(knl): def vectorize_micro_elements(knl):
vec_iname = "subel_x" vec_iname = "subel_x"
orig_iname = vec_iname orig_iname = vec_iname
...@@ -503,7 +529,7 @@ def vectorize_micro_elements(knl): ...@@ -503,7 +529,7 @@ def vectorize_micro_elements(knl):
knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname) knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
tail_iname = vec_iname + '_inner' + '_tail' tail_iname = vec_iname + '_inner' + '_tail'
knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size) knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level)
else: else:
knl = lp.split_iname(knl, vec_iname, vcl_size) knl = lp.split_iname(knl, vec_iname, vcl_size)
...@@ -512,16 +538,17 @@ def vectorize_micro_elements(knl): ...@@ -512,16 +538,17 @@ def vectorize_micro_elements(knl):
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, level, vcl_size) knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size)
knl = add_vcl_temporaries(knl, vcl_size) knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size) knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level)
knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level)
knl = add_vcl_access(knl, inner_iname, vcl_size, level) knl = add_vcl_access(knl, inner_iname, vcl_size, level)
if tail_size > 0 and vectorize_tail: if tail_size > 0:
knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) knl = add_tail_dependencies(knl, level)
if vectorize_tail:
knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1)
return knl return knl
knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size) knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size)
return knl return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment