From 161d10b4f94dbbef2a43c52d905d44ff96a2f9c0 Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Thu, 14 Feb 2019 12:10:52 +0100 Subject: [PATCH] (unvectorized) tail works with vectorization --- .../codegen/blockstructured/vectorization.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 9afe9baf..95298bb1 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -2,7 +2,7 @@ import loopy as lp import numpy as np import pymbolic.primitives as prim -from loopy.match import Tagged, Id, Writes, Or, Iname +from loopy.match import Tagged, Id, Writes, And, Or, Iname, All from islpy import BasicSet from dune.codegen.generation import get_global_context_value @@ -172,8 +172,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): def add_vcl_access(knl, iname_inner): from loopy.match import Reads, Tagged - accum_insns = set((insn.id for insn in lp.find_instructions(knl, Tagged('accum')))) - read_insns = set((insn.id for insn in lp.find_instructions(knl, Reads('*alias')))) + accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner)))))) + read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner)))))) vectorized_insns = accum_insns | read_insns from loopy.symbolic import CombineMapper @@ -260,37 +260,35 @@ def add_vcl_access(knl, iname_inner): dim = world_dimension() dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] # remove CInstructions since loopy extract expects to get only assignments - knl_without_cinsn = knl.copy(instructions=[insn for insn in knl.instructions - if not isinstance(insn, lp.CInstruction)]) + knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(iname_inner)) + if not isinstance(insn, lp.CInstruction)]) for alias in vector_alias: # Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs new_insns = [] - for insn in knl_without_cinsn.instructions: + for insn in knl_with_subst_insns.instructions: if isinstance(insn, lp.Assignment) and isinstance(insn.assignee, prim.Subscript): if insn.assignee.aggregate.name == alias: new_insns.append(insn.copy(assignee=prim.Subscript(prim.Variable('dummy_' + alias), insn.assignee.index_tuple))) - pass else: new_insns.append(insn) else: new_insns.append(insn) - knl_without_cinsn = knl_without_cinsn.copy(instructions=new_insns) + knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns) # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \ ',ix,' + ','.join(['i' + d for d in dim_names[1:dim]]) - knl_without_cinsn = lp.extract_subst(knl_without_cinsn, alias + '_subst', '{}[{}]'.format(alias, parameters), + knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters), parameters=parameters) - new_subst = knl_without_cinsn.substitutions.copy() + new_subst = knl_with_subst_insns.substitutions.copy() rule = new_subst[alias + '_subst'] rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),)) - knl_without_cinsn = knl_without_cinsn.copy(substitutions=new_subst) + knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst) - from loopy.match import All - knl_without_cinsn = lp.expand_subst(knl_without_cinsn, All()) - knl = knl_without_cinsn.copy(instructions=knl_without_cinsn.instructions + [insn for insn in knl.instructions - if isinstance(insn, lp.CInstruction)]) + knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner)) + knl = knl.copy(instructions=knl_with_subst_insns.instructions + + [insn for insn in knl.instructions if insn.id not in knl_with_subst_insns.id_to_insn]) # add store and load dependencies and set right accumulation assignee new_insns = [] -- GitLab