Skip to content
Snippets Groups Projects
Commit be6a0f44 authored by Marcel Koch's avatar Marcel Koch
Browse files

add tail explicitly

parent e700618f
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ import loopy as lp
import numpy as np
import pymbolic.primitives as prim
from loopy.match import Tagged, Id, Writes, Or
from loopy.match import Tagged, Id, Writes, Or, Iname
from dune.codegen.generation import get_global_context_value
from dune.codegen.loopy.target import dtype_floatingpoint
......@@ -44,8 +44,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
nptype = dtype_floatingpoint()
vcl_size = get_vcl_type_size(np.float64)
from loopy.match import Tagged
accum_insns = set(lp.find_instructions(knl, Tagged('accum')))
from loopy.match import Tagged, Iname, And
accum_insns = set(lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))))
new_insns = []
vng = knl.get_var_name_generator()
......@@ -375,16 +375,59 @@ def add_iname_array(knl, vec_iname):
return knl
def realize_tail(knl, iname_inner, iname_outer, vcl_size):
tail_size = get_form_option('number_of_blocks') % vcl_size
insns_to_duplicate = lp.find_instructions(knl, Iname(iname_inner))
ids_to_duplicate = tuple((insn.id for insn in insns_to_duplicate))
common_inames = knl.all_inames()
for insn in insns_to_duplicate:
common_inames = common_inames & insn.within_inames
additional_inames_to_duplicate = frozenset()
for insn in insns_to_duplicate:
additional_inames_to_duplicate = additional_inames_to_duplicate | (insn.within_inames - common_inames)
inames_to_duplicate = frozenset({iname_inner}) | additional_inames_to_duplicate
combined_domain_str = str(knl.get_inames_domain(additional_inames_to_duplicate))
for iname in additional_inames_to_duplicate:
combined_domain_str = combined_domain_str.replace(iname, iname + '_tail')
from islpy import BasicSet
new_doms = [BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size)),
BasicSet(combined_domain_str)]
subst_map = dict([(iname, prim.Variable(iname + '_tail')) for iname in inames_to_duplicate] +
[(iname_outer, get_form_option('number_of_blocks') // vcl_size)])
new_insns = []
for insn in knl.instructions:
if iname_inner in insn.within_inames:
new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map))
new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id
for insn_id in insn.depends_on))
new_within_inames = frozenset((iname + '_tail' if iname in inames_to_duplicate else iname
for iname in insn.within_inames))
new_insns.append(new_insn.copy(id=new_insn.id + '_tail', depends_on=new_depends_on,
within_inames=new_within_inames))
knl = knl.copy(domains=knl.domains + new_doms, instructions=knl.instructions + new_insns)
return lp.make_reduction_inames_unique(knl)
def vectorize_micro_elements(knl):
vec_iname = "subel_x"
if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
vcl_size = get_vcl_type_size(np.float64)
assert get_form_option('number_of_blocks') % vcl_size == 0
knl = add_iname_array(knl, vec_iname)
knl = lp.split_iname(knl, vec_iname, vcl_size, inner_tag='vec')
knl = lp.split_iname(knl, vec_iname, vcl_size, slabs=(0,1))
knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')]
knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment