Skip to content
Snippets Groups Projects
Commit ef9e2a22 authored by Marcel Koch's avatar Marcel Koch
Browse files

add dependencies to reduce loopy warnings

parent 21f86f45
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,8 @@ from dune.perftool.generation.loopy import function_mangler, globalarg
import loopy as lp
import pymbolic.primitives as prim
from loopy.match import Writes
def name_accumulation_alias(container, accumspace):
name = container + "_" + accumspace.lfs.name + "_alias"
......@@ -64,5 +66,6 @@ def generate_accumulation_instruction(expr, visitor):
forced_iname_deps=frozenset(lfs_inames).union(frozenset(quad_inames)),
forced_iname_deps_is_final=True,
predicates=predicates,
tags=frozenset({'accum'})
tags=frozenset({'accum'}),
depends_on=frozenset({Writes(accumvar_alias)})
)
......@@ -2,17 +2,16 @@ import loopy as lp
import numpy as np
import pymbolic.primitives as prim
from loopy.match import Tagged, Id
from loopy.match import Tagged, Id, Writes, Or
from dune.perftool.generation import get_global_context_value
from dune.perftool.generation import get_global_context_value, silenced_warning
from dune.perftool.loopy.target import dtype_floatingpoint
from dune.perftool.loopy.temporary import DuneTemporaryVariable
from dune.perftool.loopy.symbolic import substitute
from dune.perftool.loopy.vcl import get_vcl_type_size, VCLPermute, VCLLoad, VCLStore
from dune.perftool.options import get_form_option
from dune.perftool.pdelab.argument import PDELabAccumulationFunction
from dune.perftool.pdelab.geometry import world_dimension
from dune.perftool.tools import get_pymbolic_indices
from dune.perftool.tools import get_pymbolic_basename
def add_vcl_temporaries(knl):
......@@ -152,10 +151,14 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
assignee_tail = substitute(insn.assignee, subst_map)
expr_tail = prim.Sum((substitute(var_left, {iname_inner: 0, **replace_tail_inames}), assignee_tail))
write_to_tail_ids = tuple(i.id for i in lp.find_instructions(knl,
Writes(get_pymbolic_basename(assignee_tail))))
new_insns.append(lp.Assignment(assignee=assignee_tail,
expression=expr_tail,
id=id_accum_tail,
depends_on=frozenset({id_accum, id_permute, id_set_left, id_init_a}),
depends_on=frozenset({id_accum, id_permute, id_set_left, id_init_a}) |
frozenset(write_to_tail_ids),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer}) -
inames_micro) | inames_tail,
tags=frozenset({'tail'})))
......@@ -170,6 +173,7 @@ def add_vcl_access(knl, iname_inner):
from loopy.match import Reads, Tagged
accum_insns = set((insn.id for insn in lp.find_instructions(knl, Tagged('accum'))))
read_insns = set((insn.id for insn in lp.find_instructions(knl, Reads('*alias'))))
vectorized_insns = accum_insns | read_insns
from loopy.symbolic import CombineMapper
from loopy.symbolic import IdentityMapper
......@@ -198,23 +202,29 @@ def add_vcl_access(knl, iname_inner):
aic = AliasIndexCollector()
load_insns = []
read_dependencies = dict()
vectorized_insn_to_vector_names = dict()
for id in read_insns:
insn = knl.id_to_insn[id]
alias, index = aic(insn.expression)
name_alias = alias.name
name_vec = name_alias.replace('alias', 'vec')
vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
# compute index without vec iname
strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags)
index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides)
if i != 0 and i.name != iname_inner))
# find write insns
write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec)))))
# add load instruction
load_id = idg('insn_' + name_vec + '_load')
call_load = prim.Call(VCLLoad(name_vec), (prim.Sum((prim.Variable(name_alias), index)),))
load_insns.append(lp.CallInstruction(assignees=(), expression=call_load,
id=load_id, within_inames=insn.within_inames | insn.reduction_inames(),))
id=load_id, within_inames=insn.within_inames | insn.reduction_inames(),
depends_on=insn.depends_on | write_ids,))
read_dependencies.setdefault(id, set())
read_dependencies[id].add(load_id)
......@@ -226,18 +236,23 @@ def add_vcl_access(knl, iname_inner):
alias, index = aic(insn.expression)
name_alias = alias.name
name_vec = name_alias.replace('alias', 'vec')
vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
# flat index without vec iname
strides = tuple(tag.stride for tag in knl.arg_dict[name_alias].dim_tags)
index = prim.Sum(tuple(prim.Product((i, s)) for i, s in zip(index, strides)
if i != 0 and i.name != iname_inner))
# find write insns
write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec)))))
# add store instruction
store_id = idg('insn_' + name_vec + '_store')
call_store = prim.Call(VCLStore(name_vec), (prim.Sum((prim.Variable(name_alias), index)),))
store_insns.append(lp.CallInstruction(assignees=(), expression=call_store,
id=store_id, within_inames=insn.within_inames,
depends_on=insn.depends_on | frozenset({id}) | read_dependencies[id]))
depends_on=insn.depends_on | frozenset({id}) | read_dependencies[id] |
write_ids))
# replace alias with vcl vector, except for accumulation assignee
vector_alias = [a for a in knl.arg_dict if a.endswith('alias')]
......@@ -279,9 +294,12 @@ def add_vcl_access(knl, iname_inner):
# add store and load dependencies and set right accumulation assignee
new_insns = []
for insn in knl.instructions:
if insn.id not in read_insns | accum_insns:
if insn.id not in vectorized_insns:
new_insns.append(insn)
else:
# find write insns
name_alias, name_vec = vectorized_insn_to_vector_names[insn.id]
write_ids = frozenset(i.id for i in lp.find_instructions(knl, Or((Writes(name_vec), Writes(name_vec)))))
if insn.id in accum_insns:
assignee_alias = insn.assignee
try:
......@@ -293,9 +311,11 @@ def add_vcl_access(knl, iname_inner):
from dune.perftool.error import PerftoolVectorizationError
raise PerftoolVectorizationError
new_insns.append(insn.copy(assignee=assignee_vec,
depends_on=insn.depends_on | read_dependencies[insn.id]))
depends_on=insn.depends_on | read_dependencies[insn.id] |
write_ids))
else:
new_insns.append(insn.copy(depends_on=insn.depends_on | read_dependencies[insn.id]))
new_insns.append(insn.copy(depends_on=insn.depends_on | read_dependencies[insn.id] |
write_ids))
return knl.copy(instructions=new_insns + load_insns + store_insns)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment