From cdd98d129413c807255f00ab566345c973dc802d Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 15 Feb 2019 16:03:51 +0100 Subject: [PATCH] refactoring --- .../codegen/blockstructured/vectorization.py | 97 ++++++++++--------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index cfdfc794..d5224e6f 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -488,6 +488,54 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz return lp.make_reduction_inames_unique(knl) +def do_vectorization(knl, orig_iname, vec_iname, iname_bound, vcl_size, level=0): + inner_iname = vec_iname + '_inner' + outer_iname = vec_iname + '_outer' + + tail_size = iname_bound % vcl_size + if get_form_option('vectorization_blockstructured_tail'): + tail_vcl_size = vcl_size + while tail_vcl_size > tail_size: + tail_vcl_size = tail_vcl_size // 2 + vectorize_tail = tail_vcl_size > 1 + else: + vectorize_tail = False + + # manually add tail, since split_iname with slabs tries to vectorize the tail + if tail_size > 0: + # fake suitable loop bound + vectorizable_bound = (iname_bound // vcl_size) * vcl_size + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, (vec_iname,)) + knl = knl.copy(domains=domch.get_domains_with( + BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound)))) + + knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname) + + tail_iname = vec_iname + '_inner' + '_tail' + knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level) + else: + knl = lp.split_iname(knl, vec_iname, vcl_size) + + knl = lp.tag_inames(knl, [(inner_iname, 'vec')]) + + array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] + knl = lp.split_array_axis(knl, array_alias, level, vcl_size) + + knl = add_vcl_temporaries(knl, vcl_size) + knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level) + knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level) + knl = add_vcl_access(knl, inner_iname, vcl_size, level) + + if tail_size > 0: + knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))), + Tagged('vectorized_{}'.format(level))) + if vectorize_tail: + knl = do_vectorization(knl, orig_iname, tail_iname, tail_size, tail_vcl_size, level + 1) + + return knl + + def vectorize_micro_elements(knl): vec_iname = "subel_x" orig_iname = vec_iname @@ -495,52 +543,5 @@ def vectorize_micro_elements(knl): vcl_size = get_vcl_type_size(np.float64) knl = add_iname_array(knl, vec_iname) - def _do_vectorization(knl, vec_iname, iname_bound, vcl_size, level=0): - inner_iname = vec_iname + '_inner' - outer_iname = vec_iname + '_outer' - - tail_size = iname_bound % vcl_size - if get_form_option('vectorization_blockstructured_tail'): - tail_vcl_size = vcl_size - while tail_vcl_size > tail_size: - tail_vcl_size = tail_vcl_size // 2 - vectorize_tail = tail_vcl_size > 1 - else: - vectorize_tail = False - - # manually add tail, since split_iname with slabs tries to vectorize the tail - if tail_size > 0: - # fake suitable loop bound - vectorizable_bound = (iname_bound // vcl_size) * vcl_size - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, (vec_iname,)) - knl = knl.copy(domains=domch.get_domains_with( - BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound)))) - - knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname) - - tail_iname = vec_iname + '_inner' + '_tail' - knl = realize_tail(knl, inner_iname, outer_iname, iname_bound, tail_iname, vcl_size, level) - else: - knl = lp.split_iname(knl, vec_iname, vcl_size) - - knl = lp.tag_inames(knl, [(inner_iname, 'vec')]) - - array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] - knl = lp.split_array_axis(knl, array_alias, level, vcl_size) - - knl = add_vcl_temporaries(knl, vcl_size) - knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size, level) - knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level) - knl = add_vcl_access(knl, inner_iname, vcl_size, level) - - if tail_size > 0: - knl = lp.add_dependency(knl, And((Tagged('tail_{}'.format(level)), Not(Tagged('head*')))), - Tagged('vectorized_{}'.format(level))) - if vectorize_tail: - knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) - - return knl - - knl = _do_vectorization(knl, orig_iname, get_form_option('number_of_blocks'), vcl_size) + knl = do_vectorization(knl, orig_iname, orig_iname, get_form_option('number_of_blocks'), vcl_size) return knl -- GitLab