Skip to content
Snippets Groups Projects
Commit 161d10b4 authored by Marcel Koch's avatar Marcel Koch
Browse files

(unvectorized) tail works with vectorization

parent f564f0cb
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ import loopy as lp ...@@ -2,7 +2,7 @@ import loopy as lp
import numpy as np import numpy as np
import pymbolic.primitives as prim import pymbolic.primitives as prim
from loopy.match import Tagged, Id, Writes, Or, Iname from loopy.match import Tagged, Id, Writes, And, Or, Iname, All
from islpy import BasicSet from islpy import BasicSet
from dune.codegen.generation import get_global_context_value from dune.codegen.generation import get_global_context_value
...@@ -172,8 +172,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): ...@@ -172,8 +172,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
def add_vcl_access(knl, iname_inner): def add_vcl_access(knl, iname_inner):
from loopy.match import Reads, Tagged from loopy.match import Reads, Tagged
accum_insns = set((insn.id for insn in lp.find_instructions(knl, Tagged('accum')))) accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))))
read_insns = set((insn.id for insn in lp.find_instructions(knl, Reads('*alias')))) read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner))))))
vectorized_insns = accum_insns | read_insns vectorized_insns = accum_insns | read_insns
from loopy.symbolic import CombineMapper from loopy.symbolic import CombineMapper
...@@ -260,37 +260,35 @@ def add_vcl_access(knl, iname_inner): ...@@ -260,37 +260,35 @@ def add_vcl_access(knl, iname_inner):
dim = world_dimension() dim = world_dimension()
dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)] dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)]
# remove CInstructions since loopy extract expects to get only assignments # remove CInstructions since loopy extract expects to get only assignments
knl_without_cinsn = knl.copy(instructions=[insn for insn in knl.instructions knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(iname_inner))
if not isinstance(insn, lp.CInstruction)]) if not isinstance(insn, lp.CInstruction)])
for alias in vector_alias: for alias in vector_alias:
# Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs # Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs
new_insns = [] new_insns = []
for insn in knl_without_cinsn.instructions: for insn in knl_with_subst_insns.instructions:
if isinstance(insn, lp.Assignment) and isinstance(insn.assignee, prim.Subscript): if isinstance(insn, lp.Assignment) and isinstance(insn.assignee, prim.Subscript):
if insn.assignee.aggregate.name == alias: if insn.assignee.aggregate.name == alias:
new_insns.append(insn.copy(assignee=prim.Subscript(prim.Variable('dummy_' + alias), new_insns.append(insn.copy(assignee=prim.Subscript(prim.Variable('dummy_' + alias),
insn.assignee.index_tuple))) insn.assignee.index_tuple)))
pass
else: else:
new_insns.append(insn) new_insns.append(insn)
else: else:
new_insns.append(insn) new_insns.append(insn)
knl_without_cinsn = knl_without_cinsn.copy(instructions=new_insns) knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns)
# substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner] # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner]
parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \ parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \
',ix,' + ','.join(['i' + d for d in dim_names[1:dim]]) ',ix,' + ','.join(['i' + d for d in dim_names[1:dim]])
knl_without_cinsn = lp.extract_subst(knl_without_cinsn, alias + '_subst', '{}[{}]'.format(alias, parameters), knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters),
parameters=parameters) parameters=parameters)
new_subst = knl_without_cinsn.substitutions.copy() new_subst = knl_with_subst_insns.substitutions.copy()
rule = new_subst[alias + '_subst'] rule = new_subst[alias + '_subst']
rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),)) rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),))
knl_without_cinsn = knl_without_cinsn.copy(substitutions=new_subst) knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst)
from loopy.match import All knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner))
knl_without_cinsn = lp.expand_subst(knl_without_cinsn, All()) knl = knl.copy(instructions=knl_with_subst_insns.instructions +
knl = knl_without_cinsn.copy(instructions=knl_without_cinsn.instructions + [insn for insn in knl.instructions [insn for insn in knl.instructions if insn.id not in knl_with_subst_insns.id_to_insn])
if isinstance(insn, lp.CInstruction)])
# add store and load dependencies and set right accumulation assignee # add store and load dependencies and set right accumulation assignee
new_insns = [] new_insns = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment