From 13ab3cc05e9021070bbff2f6b1bc63ca28c42d2f Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 15 Feb 2019 10:02:19 +0100 Subject: [PATCH] recursive vectorization --- .../codegen/blockstructured/vectorization.py | 77 ++++++++++--------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 03ce2cf6..c03ebc1f 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -186,11 +186,17 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): from loopy.symbolic import IdentityMapper class AliasIndexCollector(CombineMapper, IdentityMapper): + def __init__(self): + self.found_alias = False + def combine(self, values): return sum(values, tuple()) def map_constant(self, expr): - return tuple() + if self.found_alias: + return (expr,) + else: + return tuple() map_variable = map_constant map_function_symbol = map_constant @@ -198,7 +204,10 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): def map_subscript(self, expr): if expr.aggregate.name.endswith(alias_suffix): - return expr.aggregate, expr.index_tuple + self.found_alias = True + indices = self.combine((self.rec(index) for index in expr.index_tuple)) + self.found_alias = False + return expr.aggregate, indices else: return tuple() @@ -467,50 +476,48 @@ def vectorize_micro_elements(knl): orig_iname = vec_iname if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell': vcl_size = get_vcl_type_size(np.float64) + knl = add_iname_array(knl, vec_iname) - has_tail = get_form_option('number_of_blocks') % vcl_size > 0 - vectorize_tail = True + def _do_vectorization(knl, vec_iname, vcl_size, level=0): + inner_iname = vec_iname + '_inner' + outer_iname = vec_iname + '_outer' - knl = add_iname_array(knl, vec_iname) + has_tail = get_form_option('number_of_blocks') % vcl_size > 0 + vectorize_tail = (vcl_size // 2) > 1 and \ + (get_form_option('number_of_blocks') % vcl_size) % (vcl_size // 2) == 0 - # manually add tail, since split_iname with slabs tries to vectorize the tail - if has_tail: - vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, (vec_iname,)) - knl = knl.copy(domains=domch.get_domains_with( - BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound)))) + # manually add tail, since split_iname with slabs tries to vectorize the tail + if has_tail: + # fake suitable loop bound + vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, (vec_iname,)) + knl = knl.copy(domains=domch.get_domains_with( + BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound)))) - knl = lp.split_iname(knl, vec_iname, vcl_size) - knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname) - tail_iname = vec_iname + '_inner' + '_tail' - else: - knl = lp.split_iname(knl, vec_iname, vcl_size) + tail_iname = vec_iname + '_inner' + '_tail' + knl = realize_tail(knl, inner_iname, outer_iname, tail_iname, vcl_size) + else: + knl = lp.split_iname(knl, vec_iname, vcl_size) - knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) + knl = lp.tag_inames(knl, [(inner_iname, 'vec')]) - array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] - knl = lp.split_array_axis(knl, array_alias, 0, vcl_size) + array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] + knl = lp.split_array_axis(knl, array_alias, level, vcl_size) - def _do_vectorization(knl, iname_inner, iname_outer, vcl_size, levels=0): - knl = add_vcl_iname_array(knl, orig_iname, iname_inner, vcl_size) + knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size) knl = add_vcl_temporaries(knl, vcl_size) - knl = add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size) - knl = add_vcl_access(knl, iname_inner, vcl_size, levels) - return knl + knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size) + knl = add_vcl_access(knl, inner_iname, vcl_size, level) - knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) - if has_tail and vectorize_tail: - vcl_size = vcl_size // 2 - vec_iname = tail_iname + if has_tail and vectorize_tail: + from pudb import set_trace; set_trace() + knl = _do_vectorization(knl, tail_iname, vcl_size // 2, level + 1) - knl = lp.split_iname(knl, vec_iname, vcl_size) - knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) - - array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] - knl = lp.split_array_axis(knl, array_alias, 1, vcl_size) + return knl - knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size, levels=1) + knl = _do_vectorization(knl, orig_iname, vcl_size) return knl -- GitLab