diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 9afe9baf1ef8d3c6ced1cc264da17ac38e17ea42..95298bb181cc78529789ef7856e24d921f21e90e 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -2,7 +2,7 @@ import loopy as lp import numpy as np import pymbolic.primitives as prim -from loopy.match import Tagged, Id, Writes, Or, Iname +from loopy.match import Tagged, Id, Writes, And, Or, Iname, All from islpy import BasicSet from dune.codegen.generation import get_global_context_value @@ -172,8 +172,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): def add_vcl_access(knl, iname_inner): from loopy.match import Reads, Tagged - accum_insns = set((insn.id for insn in lp.find_instructions(knl, Tagged('accum')))) - read_insns = set((insn.id for insn in lp.find_instructions(knl, Reads('*alias')))) + accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))))) + read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) vectorized_insns = accum_insns | read_insns from loopy.symbolic import CombineMapper @@ -260,37 +260,35 @@ def add_vcl_access(knl, iname_inner): dim = world_dimension() dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] # remove CInstructions since loopy extract expects to get only assignments - knl_without_cinsn = knl.copy(instructions=[insn for insn in knl.instructions - if not isinstance(insn, lp.CInstruction)]) + knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(iname_inner)) + if not isinstance(insn, lp.CInstruction)]) for alias in vector_alias: # Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs new_insns = [] - for insn in knl_without_cinsn.instructions: + for insn in knl_with_subst_insns.instructions: if isinstance(insn, lp.Assignment) and isinstance(insn.assignee, prim.Subscript): if insn.assignee.aggregate.name == alias: new_insns.append(insn.copy(assignee=prim.Subscript(prim.Variable('dummy_' + alias), insn.assignee.index_tuple))) - pass else: new_insns.append(insn) else: new_insns.append(insn) - knl_without_cinsn = knl_without_cinsn.copy(instructions=new_insns) + knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \ ',ix,' + ','.join(['i' + d for d in dim_names[1:dim]]) - knl_without_cinsn = lp.extract_subst(knl_without_cinsn, alias + '_subst', '{}[{}]'.format(alias, parameters), + knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), parameters=parameters) - new_subst = knl_without_cinsn.substitutions.copy() + new_subst = knl_with_subst_insns.substitutions.copy() rule = new_subst[alias + '_subst'] rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),)) - knl_without_cinsn = knl_without_cinsn.copy(substitutions=new_subst) + knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) - from loopy.match import All - knl_without_cinsn = lp.expand_subst(knl_without_cinsn, All()) - knl = knl_without_cinsn.copy(instructions=knl_without_cinsn.instructions + [insn for insn in knl.instructions - if isinstance(insn, lp.CInstruction)]) + knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) + knl = knl.copy(instructions=knl_with_subst_insns.instructions + + [insn for insn in knl.instructions if insn.id not in knl_with_subst_insns.id_to_insn]) # add store and load dependencies and set right accumulation assignee new_insns = []