Skip to content
Snippets Groups Projects
Commit 8097cac4 authored by Marcel Koch's avatar Marcel Koch
Browse files

vectorize tail

odd behavior for unstructured tensor test...
parent e27b8015
No related branches found
No related tags found
No related merge requests found
...@@ -173,8 +173,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): ...@@ -173,8 +173,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size):
temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries)) temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries))
def add_vcl_access(knl, iname_inner, vcl_size): def add_vcl_access(knl, iname_inner, vcl_size, levels=0):
from loopy.match import Reads, Tagged
accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)), accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)),
Iname(iname_inner)))))) Iname(iname_inner))))))
read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner))))))
...@@ -221,15 +220,15 @@ def add_vcl_access(knl, iname_inner, vcl_size): ...@@ -221,15 +220,15 @@ def add_vcl_access(knl, iname_inner, vcl_size):
# compute index without vec iname # compute index without vec iname
strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags)
index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides)
if i != 0 and i.name != iname_inner)) if not (isinstance(i, prim.Variable) and i.name == iname_inner)))
# find write insns # find write insns
write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec)))))
# add load instruction # add load instruction
load_id = idg('insn_' + name_vec + '_load') load_id = idg('insn_' + name_vec + '_load')
call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), index)),)) call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),))
load_insns.append(lp.CallInstruction(assignees=(), expression=call_load, load_insns.append(lp.CallInstruction(assignees=(), expression=call_load,
id=load_id, within_inames=insn.within_inames | insn.reduction_inames(), id=load_id, within_inames=insn.within_inames | insn.reduction_inames(),
depends_on=insn.depends_on | write_ids,)) depends_on=insn.depends_on | write_ids,))
...@@ -248,15 +247,15 @@ def add_vcl_access(knl, iname_inner, vcl_size): ...@@ -248,15 +247,15 @@ def add_vcl_access(knl, iname_inner, vcl_size):
# flat index without vec iname # flat index without vec iname
strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags) strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags)
index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides) flat_index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides)
if i != 0 and i.name != iname_inner)) if not (isinstance(i, prim.Variable) and i.name == iname_inner)))
# find write insns # find write insns
write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec))))) write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec)))))
# add store instruction # add store instruction
store_id = idg('insn_' + name_vec + '_store') store_id = idg('insn_' + name_vec + '_store')
call_store = prim.Call(VCLStore(name_vec), (prim.Sum((prim.Variable(name_alias), index)),)) call_store = prim.Call(VCLStore(name_vec), (prim.Sum((prim.Variable(name_alias), flat_index)),))
store_insns.append(lp.CallInstruction(assignees=(), expression=call_store, store_insns.append(lp.CallInstruction(assignees=(), expression=call_store,
id=store_id, within_inames=insn.within_inames, id=store_id, within_inames=insn.within_inames,
depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] | depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] |
...@@ -284,14 +283,17 @@ def add_vcl_access(knl, iname_inner, vcl_size): ...@@ -284,14 +283,17 @@ def add_vcl_access(knl, iname_inner, vcl_size):
knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns)
# substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner]
parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \ parameters = ','.join(['ex_o{}'.format(l) for l in range(levels + 1)]) + \
',ix,' + ','.join(['i' + d for d in dim_names[1:dim]]) ',v_i,' + \
','.join(['e' + d for d in dim_names[1:dim]]) + \
',ix,' + \
','.join(['i' + d for d in dim_names[1:dim]])
knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters),
parameters=parameters) parameters=parameters)
new_subst = knl_with_subst_insns.substitutions.copy() new_subst = knl_with_subst_insns.substitutions.copy()
rule = new_subst[alias + '_subst'] rule = new_subst[alias + '_subst']
rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)), rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)),
(prim.Variable('ex_i'),)) (prim.Variable('v_i'),))
knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst)
knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner))
...@@ -462,10 +464,14 @@ def realize_tail(knl, iname_inner, iname_outer, vcl_size): ...@@ -462,10 +464,14 @@ def realize_tail(knl, iname_inner, iname_outer, vcl_size):
def vectorize_micro_elements(knl): def vectorize_micro_elements(knl):
vec_iname = "subel_x" vec_iname = "subel_x"
orig_iname = vec_iname
if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell': if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
vcl_size = get_vcl_type_size(np.float64) vcl_size = get_vcl_type_size(np.float64)
has_tail = get_form_option('number_of_blocks') % vcl_size > 0 has_tail = get_form_option('number_of_blocks') % vcl_size > 0
vectorize_tail = True
knl = add_iname_array(knl, vec_iname)
# manually add tail, since split_iname with slabs tries to vectorize the tail # manually add tail, since split_iname with slabs tries to vectorize the tail
if has_tail: if has_tail:
...@@ -477,19 +483,34 @@ def vectorize_micro_elements(knl): ...@@ -477,19 +483,34 @@ def vectorize_micro_elements(knl):
knl = lp.split_iname(knl, vec_iname, vcl_size) knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
tail_iname = vec_iname + '_inner' + '_tail'
else: else:
knl = lp.split_iname(knl, vec_iname, vcl_size) knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
knl = add_iname_array(knl, vec_iname)
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')] knl = lp.split_array_axis(knl, array_alias, 0, vcl_size)
knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size)
knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec')) def _do_vectorization(knl, iname_inner, iname_outer, vcl_size, levels=0):
knl = add_vcl_iname_array(knl, orig_iname, iname_inner, vcl_size)
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size)
knl = add_vcl_access(knl, iname_inner, vcl_size, levels)
return knl
knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
if has_tail and vectorize_tail:
vcl_size = vcl_size // 2
vec_iname = tail_iname
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, 1, vcl_size)
knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size, levels=1)
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
knl = add_vcl_access(knl, vec_iname + '_inner', vcl_size)
return knl return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment