From f564f0cbe80c8d86c4c102b74bf05724562bc5cf Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Thu, 14 Feb 2019 10:55:11 +0100 Subject: [PATCH] tail works unvectorized --- .../codegen/blockstructured/vectorization.py | 74 ++++++++++++------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index e8c56e0e..9afe9baf 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -3,6 +3,7 @@ import numpy as np import pymbolic.primitives as prim from loopy.match import Tagged, Id, Writes, Or, Iname +from islpy import BasicSet from dune.codegen.generation import get_global_context_value from dune.codegen.loopy.target import dtype_floatingpoint @@ -377,46 +378,52 @@ def add_iname_array(knl, vec_iname): def realize_tail(knl, iname_inner, iname_outer, vcl_size): tail_size = get_form_option('number_of_blocks') % vcl_size + new_dom = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size)) insns_to_duplicate = lp.find_instructions(knl, Iname(iname_inner)) ids_to_duplicate = tuple((insn.id for insn in insns_to_duplicate)) - common_inames = knl.all_inames() - for insn in insns_to_duplicate: - common_inames = common_inames & insn.within_inames + subst_map = dict([(iname_outer, get_form_option('number_of_blocks') // vcl_size), + (iname_inner, prim.Variable(iname_inner + '_tail'))]) - additional_inames_to_duplicate = frozenset() + temporaries_to_duplicate = dict() for insn in insns_to_duplicate: - additional_inames_to_duplicate = additional_inames_to_duplicate | (insn.within_inames - common_inames) + if isinstance(insn, lp.Assignment): + assignee = insn.assignee + name = get_pymbolic_basename(assignee) + if name in knl.temporary_variables: + new_name = name + '_tail' + temporaries_to_duplicate[new_name] = knl.temporary_variables[name].copy(name=new_name) + subst_map[name] = prim.Variable(new_name) - inames_to_duplicate = frozenset({iname_inner}) | additional_inames_to_duplicate - - combined_domain_str = str(knl.get_inames_domain(additional_inames_to_duplicate)) - for iname in additional_inames_to_duplicate: - combined_domain_str = combined_domain_str.replace(iname, iname + '_tail') + new_insns = [] + for insn in insns_to_duplicate: + new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map)) + new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id + for insn_id in insn.depends_on)) + new_within_inames = frozenset((iname + '_tail' if iname == iname_inner else iname + for iname in insn.within_inames)) - frozenset({iname_outer}) + new_insns.append(new_insn.copy(id=insn.id + '_tail', depends_on=new_depends_on, + within_inames=new_within_inames)) - from islpy import BasicSet - new_doms = [BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(iname_inner + '_tail', tail_size)), - BasicSet(combined_domain_str)] + knl = knl.copy(domains=knl.domains + [new_dom], instructions=knl.instructions + new_insns, + temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate)) - subst_map = dict([(iname, prim.Variable(iname + '_tail')) for iname in inames_to_duplicate] + - [(iname_outer, get_form_option('number_of_blocks') // vcl_size)]) + common_inames = knl.all_inames() + for insn in insns_to_duplicate: + common_inames = common_inames & (insn.within_inames | insn.reduction_inames()) - new_insns = [] - for insn in knl.instructions: - if iname_inner in insn.within_inames: - new_insn = insn.with_transformed_expressions(lambda e: substitute(e, subst_map)) - new_depends_on = frozenset((insn_id + '_tail' if insn_id in ids_to_duplicate else insn_id - for insn_id in insn.depends_on)) - new_within_inames = frozenset((iname + '_tail' if iname in inames_to_duplicate else iname - for iname in insn.within_inames)) - new_insns.append(new_insn.copy(id=new_insn.id + '_tail', depends_on=new_depends_on, - within_inames=new_within_inames)) + additional_inames_to_duplicate = frozenset() + for insn in insns_to_duplicate: + additional_inames_to_duplicate = additional_inames_to_duplicate | \ + ((insn.within_inames | insn.reduction_inames()) - common_inames) - knl = knl.copy(domains=knl.domains + new_doms, instructions=knl.instructions + new_insns) + knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate), + Or(tuple((Id(insn.id) for insn in new_insns)))) return lp.make_reduction_inames_unique(knl) + def vectorize_micro_elements(knl): vec_iname = "subel_x" if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell': @@ -424,8 +431,19 @@ def vectorize_micro_elements(knl): knl = add_iname_array(knl, vec_iname) - knl = lp.split_iname(knl, vec_iname, vcl_size, slabs=(0,1)) - knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + # manually add tail, since split_iname with slabs tries to vectorize the tail + if get_form_option('number_of_blocks') % vcl_size > 0: + vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, (vec_iname,)) + knl = knl.copy(domains=domch.get_domains_with( + BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound)))) + + knl = lp.split_iname(knl, vec_iname, vcl_size) + knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + else: + knl = lp.split_iname(knl, vec_iname, vcl_size) + knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] -- GitLab