Skip to content
Snippets Groups Projects
Commit 80065e8f authored by Marcel Koch's avatar Marcel Koch
Browse files

removes alias stuff

parent 323c8beb
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,6 @@ import dune.perftool.blockstructured.basis
from dune.perftool.options import get_option
from dune.perftool.pdelab.quadrature import pymbolic_quadrature_position
# from dune.perftool.pdelab.geometry import to_global
from dune.perftool.blockstructured.spaces import lfs_inames
from dune.perftool.blockstructured.basis import (pymbolic_reference_gradient,
pymbolic_basis)
......@@ -27,14 +26,6 @@ class BlockStructuredInterface(PDELabInterface):
#
# Local function space related generator functions
#
def generate_accumulation_instruction(self, expr, visitor):
if get_option("vectorization_blockstructured"):
from dune.perftool.blockstructured.accumulation import generate_accumulation_instruction
return generate_accumulation_instruction(expr, visitor)
else:
from dune.perftool.pdelab.localoperator import generate_accumulation_instruction
return generate_accumulation_instruction(expr, visitor)
# TODO current way to squeeze subelem iname in, not really ideal
def lfs_inames(self, element, restriction, number=None, context=''):
return sub_element_inames() + lfs_inames(element, restriction, number, context)
......
from dune.perftool.generation import temporary_variable, instruction
from dune.perftool.options import get_option
from dune.perftool.pdelab.localoperator import determine_accumulation_space
from dune.perftool.pdelab.argument import name_accumulation_variable
from dune.perftool.pdelab.localoperator import boundary_predicates
import pymbolic.primitives as prim
def define_accumulation_alias(container, accumspace):
k = get_option("number_of_blocks")
p = accumspace.element.degree()
temporary_variable(container+"_alias", shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1),
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True)
code = "{} = &{}.container()({},0);".format(container + "_alias", container, accumspace.lfs.name)
instruction(within_inames=frozenset(),
code=code,
read_variables=frozenset({container}),
assignees=frozenset({container+"_alias"}))
def name_accumulation_alias(accumvar, accumspace):
name = accumvar+"_alias"
define_accumulation_alias(accumvar, accumspace)
return name
def generate_accumulation_instruction(expr, visitor):
# Collect the lfs and lfs indices for the accumulate call
test_lfs = determine_accumulation_space(visitor.test_info, 0)
# In the jacobian case, also determine the space for the ansatz space
ansatz_lfs = determine_accumulation_space(visitor.trial_info, 1)
# Collect the lfs and lfs indices for the accumulate call
accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
accumvar_alias = name_accumulation_alias(accumvar, test_lfs)
predicates = boundary_predicates(expr, visitor.measure, visitor.subdomain_id)
quad_inames = visitor.interface.quadrature_inames()
lfs_inames = visitor.test_info.inames
if visitor.trial_info:
lfs_inames = lfs_inames + visitor.trial_info.inames
assignee = prim.Subscript(prim.Variable(accumvar_alias), tuple(prim.Variable(i) for i in lfs_inames))
instruction(assignee=assignee,
expression=prim.Sum((expr, assignee)),
forced_iname_deps=frozenset(lfs_inames).union(frozenset(quad_inames)),
forced_iname_deps_is_final=True,
predicates=predicates
)
......@@ -9,24 +9,6 @@ import pymbolic.primitives as prim
import loopy as lp
def define_alias(container, lfs, element):
k = get_option("number_of_blocks")
p = element.degree()
temporary_variable(container+"_alias", shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1),
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True)
code = "{} = &{}({},0);".format(container+"_alias",container, lfs.name)
instruction(within_inames=frozenset(),
code=code,
read_variables=frozenset({container}),
assignees=frozenset({container+"_alias"}))
def name_alias(container, lfs, element):
define_alias(container, lfs, element)
name = container+"_alias"
return name
# TODO remove the need for element
@kernel_cached
@backend(interface="pymbolic_coefficient", name="blockstructured")
......@@ -40,9 +22,4 @@ def pymbolic_coefficient(container, lfs, element, index):
lfs = prim.Variable(lfs)
# use higher order FEM index instead of Q1 index
if get_option("vectorization_blockstructured"):
subelem_inames = sub_element_inames()
coeff_alias = name_alias(container, lfs, element)
return prim.Subscript(lp.TaggedVariable(coeff_alias, 'coeff_alias'), tuple(prim.Variable(i) for i in subelem_inames+index))
else:
return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),))
return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),))
......@@ -4,8 +4,9 @@ import pymbolic.primitives as prim
from dune.perftool.loopy.temporary import DuneTemporaryVariable
from dune.perftool.loopy.symbolic import substitute
from dune.perftool.options import get_option
from dune.perftool.pdelab.argument import PDELabAccumulationFunction
from dune.perftool.pdelab.argument import PDELabAccumulationFunction, CoefficientAccess
from dune.perftool.tools import get_pymbolic_indices
from loopy.symbolic import SubstitutionMapper
def add_vcl_vector(knl, iname_inner, iname_outer):
......@@ -16,28 +17,32 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
from loopy.symbolic import WalkMapper
class FindAlias(WalkMapper):
def __init__(self, id, expr, vecs, insn_to_vec):
def __init__(self, id, expr):
self.id = id
self.vecs = vecs
self.insn_to_vec = insn_to_vec
self.rec(expr)
def post_visit(self,expr):
if isinstance(expr, prim.Subscript) \
and isinstance(expr.aggregate, prim.Variable) \
and expr.aggregate.name.endswith('alias'):
vec_name = expr.aggregate.name.replace('alias', 'vec')
self.vecs.add(vec_name)
self.insn_to_vec.setdefault(self.id, set())
self.insn_to_vec[self.id].add((vec_name,expr))
if isinstance(expr, prim.Call):
func = expr.function
param = expr.parameters
if (isinstance(func, CoefficientAccess) or isinstance(func, PDELabAccumulationFunction)) \
and iname_inner in get_pymbolic_indices(prim.Subscript((), (param[1],))):
lfs = param[0].name
vec_name = (func.container if isinstance(func, CoefficientAccess) else func.accumobj) + "_" + lfs + "_vec"
read_vecs.add(vec_name)
read_insn_to_vec_instance.setdefault(self.id, set())
read_insn_to_vec_instance[self.id].add((vec_name, expr))
if isinstance(func, PDELabAccumulationFunction):
write_vecs.add(vec_name)
write_insn_to_vec_instance[self.id] = (vec_name, expr)
return
# find all instances where an alias is read from or written to
# save the name of the corresponding vector and the alias subscript expression
for insn in knl.instructions:
if isinstance(insn, lp.MultiAssignmentBase):
FindAlias(insn.id, insn.expression, read_vecs, read_insn_to_vec_instance)
if isinstance(insn, lp.Assignment):
FindAlias(insn.id, insn.assignee, write_vecs, write_insn_to_vec_instance)
FindAlias(insn.id, insn.expression)
read_insns = [knl.id_to_insn[id] for id in read_insn_to_vec_instance]
write_insns = [knl.id_to_insn[id] for id in write_insn_to_vec_instance]
......@@ -52,14 +57,13 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
modified_accum_insn = []
replace_accum_insn = dict()
for insn in write_insns:
if isinstance(insn, lp.Assignment):
expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee))
if expr_without_r == insn.expression:
continue
if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction):
vec_name, expr = write_insn_to_vec_instance[insn.id]
expr_accum = insn.expression.parameters[-1]
# finde iname, der auf x endet
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x")))
iname_iy = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_y")))
iname_ey = next((i for i in insn.within_inames if i.startswith('subel') and i.endswith("_y")))
# erstelle a[iy] und b
new_vec_temporaries['a_iy'] = DuneTemporaryVariable('a', dtype=np.float64, shape=(2, 4,), managed=True,
......@@ -71,7 +75,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
b = prim.Subscript(prim.Variable('b'), (prim.Variable(iname_inner),))
# init a
modified_accum_insn.append(lp.Assignment(assignee=substitute(a,{iname_iy:prim.Variable(iname_iy+'_head')}),
modified_accum_insn.append(lp.Assignment(assignee=substitute(a, {iname_iy: prim.Variable(iname_iy+'_head')}),
expression=0,
id='insn_init_a',
within_inames=(insn.within_inames-frozenset({iname_ix, iname_iy,
......@@ -81,8 +85,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
)
# setze werte für a und b
expr_b = substitute(expr_without_r, {iname_ix:1})
expr_a = prim.Sum((substitute(expr_without_r, {iname_ix:0}), a))
expr_b = substitute(expr_accum, {iname_ix: 1})
expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), a))
modified_accum_insn.append(lp.Assignment(assignee=b,
expression=expr_b,
......@@ -100,10 +104,10 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
)
# r+=a[iy]
expr_accum = prim.Sum((a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (b,)),
substitute(insn.assignee,{iname_ix:0})))
replace_accum_insn[insn.id] = lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}),
expression=expr_accum,
r_vec = prim.Subscript(prim.Variable(vec_name),(prim.Variable(iname_inner),))
expr_accum_mod = prim.Sum((a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (b,)), r_vec))
replace_accum_insn[insn.id] = lp.Assignment(assignee=r_vec,
expression=expr_accum_mod,
id='insn_mod_accum',
depends_on=insn.depends_on|frozenset({'insn_b', 'insn_init_a',
'insn_a_iy'}),
......@@ -115,39 +119,25 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
expression=expr_permute,
id='insn_permute',
depends_on=replace_accum_insn[insn.id].depends_on
|frozenset({replace_accum_insn[insn.id].id}),
|frozenset({replace_accum_insn[insn.id].id, "insn_b"}),
within_inames=insn.within_inames-frozenset({iname_ix})
)
)
# tail handling
assignee_tail = prim.Subscript(prim.Variable(insn.assignee.aggregate.name),
(get_option("number_of_blocks"),
prim.Variable(iname_ey),
0,
prim.Variable(iname_iy+'_tail')))
expr_tail = prim.Sum((prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy+'_tail'), 0)),
assignee_tail))
modified_accum_insn.append(lp.Assignment(assignee=assignee_tail,
expression=expr_tail,
id='insn_tail',
depends_on=frozenset({replace_accum_insn[insn.id].id,
'insn_permute', 'insn_a_iy', 'insn_a_init'}),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer,
iname_ix, iname_iy}))
| frozenset({iname_iy+'_tail'})))
# update found instances with respect to modified accumulation instruction
def update_insn_to_vec_instance(insns, insn_to_vec_instance):
for insn in insns:
if insn.id in replace_accum_insn:
insns.remove(insn)
new_insn = replace_accum_insn[insn.id]
insns.append(new_insn)
insn_to_vec_instance.pop(insn.id)
FindAlias(new_insn.id, new_insn.expression, set(), insn_to_vec_instance)
update_insn_to_vec_instance(read_insns, read_insn_to_vec_instance)
update_insn_to_vec_instance(write_insns, write_insn_to_vec_instance)
subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")/4, iname_iy: prim.Variable(iname_iy+'_tail'),
iname_ix: 0}
expr_tail = prim.Call(expr.function, tuple(substitute(p, subst_map) for p in expr.parameters[:-1])
+ (prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy+'_tail'), 0)),))
modified_accum_insn.append(lp.CallInstruction(assignees=(),
expression=expr_tail,
id='insn_tail',
depends_on=frozenset({replace_accum_insn[insn.id].id,
'insn_permute', 'insn_a_iy', 'insn_a_init'}),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer,
iname_ix, iname_iy}))
| frozenset({iname_iy+'_tail'})))
# add load instructions if the vector is read, based on the read instruction
# TODO brauche mehrere vectoren, falls in einer insn von einem alias mit unterschiedlichen idx gelesen wird
......@@ -156,18 +146,23 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
read_dependencies = dict()
for insn in read_insns:
for vec, expr in read_insn_to_vec_instance[insn.id]:
alias = vec.replace('vec', 'alias')
# different code for accumulation variable
if isinstance(expr.function, PDELabAccumulationFunction):
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x")))
index = substitute(expr.parameters[1], {iname_inner:0, iname_ix: 0})
code = "{}.load(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index)
within_inames = insn.within_inames-frozenset({iname_ix})
else:
# flat index without vec iname
# add load instruction
index = substitute(expr.parameters[1], {iname_inner:0})
code = "{}.load(&{}({}, {}));".format(vec, expr.function.name, expr.parameters[0], index)
within_inames = insn.within_inames|insn.reduction_inames()
# flat index without vec iname
strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags)
index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides)))
# add load instruction
code = "{}.load({} + {});".format(vec, alias, index)
load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load')
load_insns.append(lp.CInstruction(iname_exprs=[], code=code, read_variables=frozenset({alias}),
within_inames=insn.within_inames|insn.reduction_inames(),
#assignees=(lp.TaggedVariable(vec, 'coeff_vec'), ), #sonst denkt looy das müsste ein array von Vec4d sein...
load_insns.append(lp.CInstruction(iname_exprs=[], code=code,
within_inames=within_inames,
#assignees=(lp.Variable(vec), ), # sonst denkt looy das müsste ein array von Vec4d sein...
id=load_id))
read_dependencies.setdefault(insn.id, set())
read_dependencies[insn.id].add(load_id)
......@@ -175,43 +170,53 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# add store instructions if the vector is written, based on the write instruction
store_insns = []
for insn in write_insns:
for vec, expr in write_insn_to_vec_instance[insn.id]:
alias = vec.replace('vec', 'alias')
# flat index without vec iname
strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags)
index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides)))
# add store instruction
code = "{}.store({} + {});".format(vec, alias, index)
store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
store_insns.append(lp.CInstruction(iname_exprs=[], code=code, read_variables=frozenset({alias}),
within_inames=insn.within_inames,
depends_on=insn.depends_on
| frozenset({insn.id})
| read_dependencies[insn.id],
id=store_id))
# exchange alias for vector
new_insns = []
vec, expr = write_insn_to_vec_instance[insn.id]
# flat index without vec iname
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x")))
index = substitute(expr.parameters[1], {iname_inner:0, iname_ix: 0})
# add store instruction
code = "{}.store(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index)
store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
store_insns.append(lp.CInstruction(iname_exprs=[], code=code,
within_inames=insn.within_inames-frozenset({iname_ix}),
depends_on=insn.depends_on
| frozenset({replace_accum_insn[insn.id].id})
| read_dependencies[insn.id],
id=store_id))
new_insns = []
for insn in knl.instructions:
if insn.id in replace_accum_insn:
insn = replace_accum_insn[insn.id]
if insn.id not in read_insn_to_vec_instance.keys() | write_insn_to_vec_instance.keys():
new_insns.append(insn)
else:
subst_map = dict()
for vec, expr in read_insn_to_vec_instance[insn.id]:
subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),))
new_insn = insn
if insn in read_insns:
new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map),
depends_on=new_insn.depends_on|read_dependencies[insn.id])
if insn in write_insns:
new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map))
if insn.id in replace_accum_insn:
new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on
|read_dependencies[insn.id])
else:
subst_map = dict()
for vec, expr in read_insn_to_vec_instance[insn.id]:
subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),))
class NodeSubstitutor(SubstitutionMapper):
def __init__(self):
from pymbolic.mapper.substitutor import make_subst_func
self.subst_func = make_subst_func(subst_map)
def map_call(self, expr, *args, **kwargs):
result = self.subst_func(expr)
if result is not None:
return result
else:
return SubstitutionMapper.map_call(self, expr, args, kwargs)
new_insn = insn
if insn in read_insns:
new_insn = new_insn.copy(expression=NodeSubstitutor()(new_insn.expression),
depends_on=new_insn.depends_on|read_dependencies[insn.id])
if insn in write_insns:
new_insn = new_insn.copy(assignee=NodeSubstitutor()(new_insn.assignee))
new_insns.append(new_insn)
from loopy.kernel.creation import resolve_dependencies
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment