diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index ac3a58db7bb8f66c40e154ade21c937504bdffce..c3a4b81f907bfc3c9eec8496ec8cd238dd56e4e7 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -2,7 +2,7 @@ import loopy as lp import numpy as np import pymbolic.primitives as prim -from loopy.match import Tagged, Id, Writes, And, Or, Iname, All +from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All from islpy import BasicSet from dune.codegen.generation import get_global_context_value @@ -15,25 +15,25 @@ from dune.codegen.pdelab.geometry import world_dimension from dune.codegen.tools import get_pymbolic_basename -def add_vcl_temporaries(knl): +def add_vcl_temporaries(knl, vcl_size): vector_alias = [a for a in knl.arg_dict if a.endswith('alias')] # add new temporaries for vectors # hope one read insn doesn't have two different reads from the same temporary new_vec_temporaries = dict() new_insns = [] - init_iname = 'init_vec' + init_iname = 'init_vec{}'.format(vcl_size) from islpy import BasicSet init_domain = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(init_iname, get_vcl_type_size(dtype_floatingpoint()))) for alias in vector_alias: - vector_name = alias.replace('alias', 'vec') + vector_name = alias.replace('alias', 'vec{}'.format(vcl_size)) new_vec_temporaries[vector_name] = DuneTemporaryVariable(vector_name, dtype=np.float64, - shape=(get_vcl_type_size(np.float64),), managed=True, + shape=(vcl_size,), managed=True, scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) # write once to the vector such that loopy won't complain new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(vector_name), prim.Variable(init_iname)), expression=0, within_inames=frozenset({init_iname}), - id='init_' + vector_name)) + id='init_{}'.format(vector_name))) from loopy.kernel.data import VectorizeTag return knl.copy(instructions=knl.instructions + new_insns, domains=knl.domains + [init_domain], @@ -41,9 +41,8 @@ def add_vcl_temporaries(knl): iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()})) -def add_vcl_accum_insns(knl, iname_inner, iname_outer): +def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size): nptype = dtype_floatingpoint() - vcl_size = get_vcl_type_size(np.float64) accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))) accum_ids = [insn.id for insn in accum_insns] @@ -59,7 +58,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee)) inames_micro = set((i for i in insn.within_inames if i.startswith('micro'))) - iname_ix = next((i for i in inames_micro if i.endswith("_x"))) + iname_ix = next((i for i in inames_micro if '_x' in i)) # need inames for head and tail handling a priori from loopy.match import Not, All @@ -76,8 +75,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): inames_tail = frozenset((var.name for var in replace_tail_inames.values())) # erstelle a[iy] und b - identifier_left = vng('left_node') - identifier_right = vng('right_node') + identifier_left = vng('left_node_vec{}'.format(vcl_size)) + identifier_right = vng('right_node_vec{}'.format(vcl_size)) new_vec_temporaries[identifier_left] = DuneTemporaryVariable(identifier_left, dtype=np.float64, shape=(2,) * (world_dimension() - 1) + (vcl_size,), @@ -100,7 +99,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): id=id_init_a, within_inames=(insn.within_inames - frozenset({iname_outer}) - inames_micro) | inames_head, - tags=frozenset({'head'}))) + tags=frozenset({'head_vec{}'.format(vcl_size)}))) # setze werte für a und b expr_right = substitute(expr_without_r, {iname_ix: 1}) @@ -131,7 +130,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): depends_on=insn.depends_on | frozenset({id_set_left, id_init_a, id_set_right}), within_inames=insn.within_inames - frozenset({iname_ix}), - tags=frozenset({'accum'}))) + tags=frozenset({'accum_vec{}'.format(vcl_size)}))) # a[iy] = permute id_permute = idg('{}_permute'.format(insn.id)) expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)), @@ -162,7 +161,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): frozenset(write_to_tail_ids)), within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer}) - inames_micro) | inames_tail, - tags=frozenset({'tail'}))) + tags=frozenset({'tail_vec{}'.format(vcl_size)}))) else: if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids: accum_id = insn.id.replace('_tail', '') @@ -174,12 +173,16 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries)) -def add_vcl_access(knl, iname_inner): +def add_vcl_access(knl, iname_inner, vcl_size): from loopy.match import Reads, Tagged - accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))))) + accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)), + Iname(iname_inner)))))) read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) vectorized_insns = accum_insns | read_insns + alias_suffix = 'alias' + vector_sufix = 'vec{}'.format(vcl_size) + from loopy.symbolic import CombineMapper from loopy.symbolic import IdentityMapper @@ -195,7 +198,7 @@ def add_vcl_access(knl, iname_inner): map_loopy_function_identifier = map_constant def map_subscript(self, expr): - if expr.aggregate.name.endswith('alias'): + if expr.aggregate.name.endswith(alias_suffix): return expr.aggregate, expr.index_tuple else: return tuple() @@ -213,7 +216,7 @@ def add_vcl_access(knl, iname_inner): alias, index = aic(insn.expression) name_alias = alias.name - name_vec = name_alias.replace('alias', 'vec') + name_vec = name_alias.replace(alias_suffix, vector_sufix) vectorized_insn_to_vector_names[id] = (name_alias, name_vec) # compute index without vec iname @@ -240,7 +243,7 @@ def add_vcl_access(knl, iname_inner): alias, index = aic(insn.expression) name_alias = alias.name - name_vec = name_alias.replace('alias', 'vec') + name_vec = name_alias.replace(alias_suffix, vector_sufix) vectorized_insn_to_vector_names[id] = (name_alias, name_vec) # flat index without vec iname @@ -260,7 +263,7 @@ def add_vcl_access(knl, iname_inner): write_ids))) # replace alias with vcl vector, except for accumulation assignee - vector_alias = [a for a in knl.arg_dict if a.endswith('alias')] + vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)] dim = world_dimension() dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] # remove CInstructions since loopy extract expects to get only assignments @@ -287,7 +290,8 @@ def add_vcl_access(knl, iname_inner): parameters=parameters) new_subst = knl_with_subst_insns.substitutions.copy() rule = new_subst[alias + '_subst'] - rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),)) + rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)), + (prim.Variable('ex_i'),)) knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) @@ -308,7 +312,7 @@ def add_vcl_access(knl, iname_inner): try: assignee_vec = next((expr for expr in insn.expression.children if isinstance(expr, prim.Subscript) and - expr.aggregate.name.replace('vec', 'alias') == + expr.aggregate.name.replace(vector_sufix, alias_suffix) == assignee_alias.aggregate.name.replace('dummy_', ''))) except StopIteration: from dune.codegen.error import CodegenVectorizationError @@ -431,10 +435,10 @@ def vectorize_micro_elements(knl): if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell': vcl_size = get_vcl_type_size(np.float64) - knl = add_iname_array(knl, vec_iname) + has_tail = get_form_option('number_of_blocks') % vcl_size > 0 # manually add tail, since split_iname with slabs tries to vectorize the tail - if get_form_option('number_of_blocks') % vcl_size > 0: + if has_tail: vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, (vec_iname,)) @@ -448,12 +452,14 @@ def vectorize_micro_elements(knl): knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')]) + knl = add_iname_array(knl, vec_iname) + array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')] iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')] knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size) knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec')) - knl = add_vcl_temporaries(knl) - knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer') - knl = add_vcl_access(knl, vec_iname + '_inner') + knl = add_vcl_temporaries(knl, vcl_size) + knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size) + knl = add_vcl_access(knl, vec_iname + '_inner', vcl_size) return knl