Skip to content
Snippets Groups Projects
Commit f564f0cb authored by Marcel Koch's avatar Marcel Koch
Browse files

tail works unvectorized

parent be6a0f44
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ import numpy as np
import pymbolic.primitives as prim
from loopy.match import Tagged, Id, Writes, Or, Iname
from islpy import BasicSet
from dune.codegen.generation import get_global_context_value
from dune.codegen.loopy.target import dtype_floatingpoint
......@@ -377,46 +378,52 @@ def add_iname_array(knl, vec_iname):
def realize_tail(knl, iname_inner, iname_outer, vcl_size):
tail_size = get_form_option('number_of_blocks') % vcl_size
new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size))
insns_to_duplicate = lp.find_instructions(knl, Iname(iname_inner))
ids_to_duplicate = tuple((insn.id for insn in insns_to_duplicate))
common_inames = knl.all_inames()
for insn in insns_to_duplicate:
common_inames = common_inames & insn.within_inames
subst_map = dict([(iname_outer, get_form_option('number_of_blocks') // vcl_size),
(iname_inner, prim.Variable(iname_inner + '_tail'))])
additional_inames_to_duplicate = frozenset()
temporaries_to_duplicate = dict()
for insn in insns_to_duplicate:
additional_inames_to_duplicate = additional_inames_to_duplicate | (insn.within_inames - common_inames)
if isinstance(insn, lp.Assignment):
assignee = insn.assignee
name = get_pymbolic_basename(assignee)
if name in knl.temporary_variables:
new_name = name + '_tail'
temporaries_to_duplicate[new_name] = knl.temporary_variables[name].copy(name=new_name)
subst_map[name] = prim.Variable(new_name)
inames_to_duplicate = frozenset({iname_inner}) | additional_inames_to_duplicate
combined_domain_str = str(knl.get_inames_domain(additional_inames_to_duplicate))
for iname in additional_inames_to_duplicate:
combined_domain_str = combined_domain_str.replace(iname, iname + '_tail')
new_insns = []
for insn in insns_to_duplicate:
new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map))
new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id
for insn_id in insn.depends_on))
new_within_inames = frozenset((iname + '_tail' if iname == iname_inner else iname
for iname in insn.within_inames)) - frozenset({iname_outer})
new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on,
within_inames=new_within_inames))
from islpy import BasicSet
new_doms = [BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size)),
BasicSet(combined_domain_str)]
knl = knl.copy(domains=knl.domains + [new_dom], instructions=knl.instructions + new_insns,
temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate))
subst_map = dict([(iname, prim.Variable(iname + '_tail')) for iname in inames_to_duplicate] +
[(iname_outer, get_form_option('number_of_blocks') // vcl_size)])
common_inames = knl.all_inames()
for insn in insns_to_duplicate:
common_inames = common_inames & (insn.within_inames | insn.reduction_inames())
new_insns = []
for insn in knl.instructions:
if iname_inner in insn.within_inames:
new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map))
new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id
for insn_id in insn.depends_on))
new_within_inames = frozenset((iname + '_tail' if iname in inames_to_duplicate else iname
for iname in insn.within_inames))
new_insns.append(new_insn.copy(id=new_insn.id + '_tail', depends_on=new_depends_on,
within_inames=new_within_inames))
additional_inames_to_duplicate = frozenset()
for insn in insns_to_duplicate:
additional_inames_to_duplicate = additional_inames_to_duplicate | \
((insn.within_inames | insn.reduction_inames()) - common_inames)
knl = knl.copy(domains=knl.domains + new_doms, instructions=knl.instructions + new_insns)
knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate),
Or(tuple((Id(insn.id) for insn in new_insns))))
return lp.make_reduction_inames_unique(knl)
def vectorize_micro_elements(knl):
vec_iname = "subel_x"
if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
......@@ -424,8 +431,19 @@ def vectorize_micro_elements(knl):
knl = add_iname_array(knl, vec_iname)
knl = lp.split_iname(knl, vec_iname, vcl_size, slabs=(0,1))
knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
# manually add tail, since split_iname with slabs tries to vectorize the tail
if get_form_option('number_of_blocks') % vcl_size > 0:
vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
knl = knl.copy(domains=domch.get_domains_with(
BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
else:
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment