Skip to content
Snippets Groups Projects
Commit c15d1ef8 authored by Marcel Koch's avatar Marcel Koch
Browse files

Merge branch 'loads-as-call-instructions' into blockstructured-vectorization

# Conflicts:
#	python/dune/perftool/blockstructured/accumulation.py
#	python/dune/perftool/blockstructured/argument.py
#	python/dune/perftool/blockstructured/vectorization.py
parents 822ce8e3 20c0c232
No related branches found
No related tags found
No related merge requests found
from dune.perftool.generation import temporary_variable, instruction
from dune.perftool.options import get_option
from dune.perftool.pdelab.localoperator import determine_accumulation_space
from dune.perftool.pdelab.argument import name_accumulation_variable
from dune.perftool.pdelab.localoperator import boundary_predicates
import pymbolic.primitives as prim
def name_accumulation_alias(container, accumspace):
name = container+"_"+accumspace.lfs.name+"_alias"
k = get_option("number_of_blocks")
p = accumspace.element.degree()
temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1),
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True)
code = "{} = &{}.container()({},0);".format(container + "_"+accumspace.lfs.name+"_alias", container, accumspace.lfs.name)
instruction(within_inames=frozenset(),
code=code,
read_variables=frozenset({container}),
assignees=frozenset({name}))
return name
def generate_accumulation_instruction(expr, visitor):
# Collect the lfs and lfs indices for the accumulate call
test_lfs = determine_accumulation_space(visitor.test_info, 0)
# In the jacobian case, also determine the space for the ansatz space
ansatz_lfs = determine_accumulation_space(visitor.trial_info, 1)
# Collect the lfs and lfs indices for the accumulate call
accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
accumvar_alias = name_accumulation_alias(accumvar, test_lfs)
predicates = boundary_predicates(expr, visitor.measure, visitor.subdomain_id)
quad_inames = visitor.interface.quadrature_inames()
lfs_inames = visitor.test_info.inames
if visitor.trial_info:
lfs_inames = lfs_inames + visitor.trial_info.inames
assignee = prim.Subscript(prim.Variable(accumvar_alias), tuple(prim.Variable(i) for i in lfs_inames))
instruction(assignee=assignee,
expression=prim.Sum((expr, assignee)),
forced_iname_deps=frozenset(lfs_inames).union(frozenset(quad_inames)),
forced_iname_deps_is_final=True,
predicates=predicates
)
...@@ -9,6 +9,20 @@ import pymbolic.primitives as prim ...@@ -9,6 +9,20 @@ import pymbolic.primitives as prim
import loopy as lp import loopy as lp
def name_alias(container, lfs, element):
name = container+"_"+lfs.name+"_alias"
k = get_option("number_of_blocks")
p = element.degree()
temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1),
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True)
code = "{} = &{}({},0);".format(name,container, lfs.name)
instruction(within_inames=frozenset(),
code=code,
read_variables=frozenset({container}),
assignees=frozenset({name}))
return name
# TODO remove the need for element # TODO remove the need for element
@kernel_cached @kernel_cached
@backend(interface="pymbolic_coefficient", name="blockstructured") @backend(interface="pymbolic_coefficient", name="blockstructured")
...@@ -22,4 +36,9 @@ def pymbolic_coefficient(container, lfs, element, index): ...@@ -22,4 +36,9 @@ def pymbolic_coefficient(container, lfs, element, index):
lfs = prim.Variable(lfs) lfs = prim.Variable(lfs)
# use higher order FEM index instead of Q1 index # use higher order FEM index instead of Q1 index
return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),)) if get_option("vectorization_blockstructured"):
subelem_inames = sub_element_inames()
coeff_alias = name_alias(container, lfs, element)
return prim.Subscript(lp.TaggedVariable(coeff_alias, 'coeff_alias'), tuple(prim.Variable(i) for i in subelem_inames+index))
else:
return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),))
...@@ -17,32 +17,28 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -17,32 +17,28 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
from loopy.symbolic import WalkMapper from loopy.symbolic import WalkMapper
class FindAlias(WalkMapper): class FindAlias(WalkMapper):
def __init__(self, id, expr): def __init__(self, id, expr, vecs, insn_to_vec):
self.id = id self.id = id
self.vecs = vecs
self.insn_to_vec = insn_to_vec
self.rec(expr) self.rec(expr)
def post_visit(self,expr): def post_visit(self,expr):
if isinstance(expr, prim.Call): if isinstance(expr, prim.Subscript) \
func = expr.function and isinstance(expr.aggregate, prim.Variable) \
param = expr.parameters and expr.aggregate.name.endswith('alias'):
if (isinstance(func, CoefficientAccess) or isinstance(func, PDELabAccumulationFunction)) \ vec_name = expr.aggregate.name.replace('alias', 'vec')
and iname_inner in get_pymbolic_indices(prim.Subscript((), (param[1],))): self.vecs.add(vec_name)
lfs = param[0].name self.insn_to_vec.setdefault(self.id, set())
vec_name = (func.container if isinstance(func, CoefficientAccess) else func.accumobj) + "_" + lfs + "_vec" self.insn_to_vec[self.id].add((vec_name,expr))
read_vecs.add(vec_name)
read_insn_to_vec_instance.setdefault(self.id, set())
read_insn_to_vec_instance[self.id].add((vec_name, expr))
if isinstance(func, PDELabAccumulationFunction):
write_vecs.add(vec_name)
write_insn_to_vec_instance[self.id] = (vec_name, expr)
return return
# find all instances where an alias is read from or written to # find all instances where an alias is read from or written to
# save the name of the corresponding vector and the alias subscript expression # save the name of the corresponding vector and the alias subscript expression
for insn in knl.instructions: for insn in knl.instructions:
if isinstance(insn, lp.MultiAssignmentBase): if isinstance(insn, lp.MultiAssignmentBase):
FindAlias(insn.id, insn.expression) FindAlias(insn.id, insn.expression, read_vecs, read_insn_to_vec_instance)
if isinstance(insn, lp.Assignment):
FindAlias(insn.id, insn.assignee, write_vecs, write_insn_to_vec_instance)
read_insns = [knl.id_to_insn[id] for id in read_insn_to_vec_instance] read_insns = [knl.id_to_insn[id] for id in read_insn_to_vec_instance]
write_insns = [knl.id_to_insn[id] for id in write_insn_to_vec_instance] write_insns = [knl.id_to_insn[id] for id in write_insn_to_vec_instance]
...@@ -53,16 +49,16 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -53,16 +49,16 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
new_vec_temporaries[vec] = DuneTemporaryVariable(vec, dtype=np.float64, shape=(4,), managed=True, new_vec_temporaries[vec] = DuneTemporaryVariable(vec, dtype=np.float64, shape=(4,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
# write accum expr as "expr + r"
modified_accum_insn = [] modified_accum_insn = []
replace_accum_insn = dict() replace_accum_insn = dict()
vng = knl.get_var_name_generator() vng = knl.get_var_name_generator()
idg = knl.get_instruction_id_generator() idg = knl.get_instruction_id_generator()
for insn in write_insns: for insn in write_insns:
if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction): if isinstance(insn, lp.Assignment):
vec_name, expr = write_insn_to_vec_instance[insn.id] # write accum expr as "r = expr + r"
expr_accum = insn.expression.parameters[-1] expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee))
if expr_without_r == insn.expression:
continue
# finde micro inames # finde micro inames
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x"))) iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x")))
iname_iy = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_y"))) iname_iy = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_y")))
...@@ -80,7 +76,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -80,7 +76,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# init a # init a
id_init_a = idg('insn_init_'+identifier_a) id_init_a = idg('insn_init_'+identifier_a)
modified_accum_insn.append(lp.Assignment(assignee=substitute(var_a, {iname_iy: prim.Variable(iname_iy+'_head')}), modified_accum_insn.append(lp.Assignment(assignee=substitute(var_a,{iname_iy:prim.Variable(iname_iy+'_head')}),
expression=0, expression=0,
id=id_init_a, id=id_init_a,
within_inames=(insn.within_inames-frozenset({iname_ix, iname_iy, within_inames=(insn.within_inames-frozenset({iname_ix, iname_iy,
...@@ -90,8 +86,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -90,8 +86,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
) )
# setze werte für a und b # setze werte für a und b
expr_b = substitute(expr_accum, {iname_ix: 1}) expr_b = substitute(expr_without_r, {iname_ix:1})
expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), var_a)) expr_a = prim.Sum((substitute(expr_without_r, {iname_ix:0}), var_a))
id_set_a = idg('insn_'+identifier_a) id_set_a = idg('insn_'+identifier_a)
id_set_b = idg('insn_'+identifier_b) id_set_b = idg('insn_'+identifier_b)
...@@ -112,13 +108,12 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -112,13 +108,12 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# r+=a[iy] # r+=a[iy]
id_accum = idg('insn_mod_accum') id_accum = idg('insn_mod_accum')
r_vec = prim.Subscript(prim.Variable(vec_name),(prim.Variable(iname_inner),)) expr_accum = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)),
expr_accum_mod = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)), r_vec)) substitute(insn.assignee, {iname_ix:0})))
replace_accum_insn[insn.id] = lp.Assignment(assignee=r_vec, replace_accum_insn[insn.id] = lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}),
expression=expr_accum_mod, expression=expr_accum,
id=id_accum, id=id_accum,
depends_on=insn.depends_on|frozenset({id_set_a, id_set_b, depends_on=insn.depends_on|frozenset({id_set_a,id_init_a,id_set_b}),
id_init_a}),
within_inames=insn.within_inames-frozenset({iname_ix}) within_inames=insn.within_inames-frozenset({iname_ix})
) )
# a[iy] = permute # a[iy] = permute
...@@ -133,21 +128,35 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -133,21 +128,35 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
) )
) )
# tail handling # tail handling | frozenset({iname_iy+'_tail'})))
id_accum_tail = idg('insn_accum_tail') id_accum_tail = idg('insn_accum_tail')
subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")/4, iname_iy: prim.Variable(iname_iy+'_tail'), subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")/4, iname_iy: prim.Variable(iname_iy+'_tail'),
iname_ix: 0} iname_ix: 0}
expr_tail = prim.Call(expr.function, tuple(substitute(p, subst_map) for p in expr.parameters[:-1]) assignee_tail = substitute(insn.assignee, subst_map)
+ (prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy+'_tail'), 0)),)) expr_tail = prim.Sum((prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy+'_tail'), 0)),
assignee_tail))
modified_accum_insn.append(lp.CallInstruction(assignees=(), modified_accum_insn.append(lp.Assignment(assignee=assignee_tail,
expression=expr_tail, expression=expr_tail,
id=id_accum_tail, id=id_accum_tail,
depends_on=frozenset({replace_accum_insn[insn.id].id, depends_on=frozenset({replace_accum_insn[insn.id].id,
id_permute, id_set_a, id_init_a}), id_permute, id_set_a, id_init_a}),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer, within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer,
iname_ix, iname_iy})) iname_ix, iname_iy}))
| frozenset({iname_iy+'_tail'}))) | frozenset({iname_iy+'_tail'})))
# update found instances with respect to modified accumulation instruction
def update_insn_to_vec_instance(insns, expr, insn_to_vec_instance):
new_insns = []
for insn in insns:
if insn.id in replace_accum_insn:
new_insns.append(replace_accum_insn[insn.id])
insn_to_vec_instance.pop(insn.id)
FindAlias(new_insns[-1].id, expr(new_insns[-1]), set(), insn_to_vec_instance)
else:
new_insns.append(insn)
return new_insns
read_insns = update_insn_to_vec_instance(read_insns, lambda i: i.expression, read_insn_to_vec_instance)
write_insns = update_insn_to_vec_instance(write_insns, lambda i: i.assignee, write_insn_to_vec_instance)
# add load instructions if the vector is read, based on the read instruction # add load instructions if the vector is read, based on the read instruction
# TODO brauche mehrere vectoren, falls in einer insn von einem alias mit unterschiedlichen idx gelesen wird # TODO brauche mehrere vectoren, falls in einer insn von einem alias mit unterschiedlichen idx gelesen wird
...@@ -156,79 +165,59 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -156,79 +165,59 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
read_dependencies = dict() read_dependencies = dict()
for insn in read_insns: for insn in read_insns:
for vec, expr in read_insn_to_vec_instance[insn.id]: for vec, expr in read_insn_to_vec_instance[insn.id]:
# different code for accumulation variable alias = vec.replace('vec', 'alias')
if isinstance(expr.function, PDELabAccumulationFunction):
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x"))) # flat index without vec iname
index = substitute(expr.parameters[1], {iname_inner:0, iname_ix: 0}) strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags)
code = "{}.load(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index) index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides)))
within_inames = insn.within_inames-frozenset({iname_ix})
else:
# flat index without vec iname
# add load instruction
index = substitute(expr.parameters[1], {iname_inner:0})
code = "{}.load(&{}({}, {}));".format(vec, expr.function.name, expr.parameters[0], index)
within_inames = insn.within_inames|insn.reduction_inames()
# load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load') # add load instruction
load_id = idg('insn_'+vec+'_load') load_id = idg('insn_'+vec+'_load')
load_insns.append(lp.CInstruction(iname_exprs=[], code=code, call_load = prim.Call(prim.Variable(vec+'.load'), (prim.Sum((prim.Variable(alias), index)),))
within_inames=within_inames, load_insns.append(lp.CallInstruction(assignees=(),expression=call_load,
#assignees=(lp.Variable(vec), ), # sonst denkt looy das müsste ein array von Vec4d sein... id=load_id,within_inames=insn.within_inames|insn.reduction_inames(),))
id=load_id))
read_dependencies.setdefault(insn.id, set()) read_dependencies.setdefault(insn.id, set())
read_dependencies[insn.id].add(load_id) read_dependencies[insn.id].add(load_id)
# add store instructions if the vector is written, based on the write instruction # add store instructions if the vector is written, based on the write instruction
store_insns = [] store_insns = []
for insn in write_insns: for insn in write_insns:
vec, expr = write_insn_to_vec_instance[insn.id] for vec, expr in write_insn_to_vec_instance[insn.id]:
alias = vec.replace('vec', 'alias')
# flat index without vec iname
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x"))) # flat index without vec iname
index = substitute(expr.parameters[1], {iname_inner:0, iname_ix: 0}) strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags)
index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides)))
# add store instruction
code = "{}.store(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index) # add store instruction
# store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store') store_id = idg('insn_'+vec+'_store')
store_id = idg('insn_'+vec+'_store') call_store = prim.Call(prim.Variable(vec+'.store'), (prim.Sum((prim.Variable(alias), index)),))
store_insns.append(lp.CInstruction(iname_exprs=[], code=code, store_insns.append(lp.CallInstruction(assignees=(),expression=call_store,
within_inames=insn.within_inames-frozenset({iname_ix}), id=store_id,within_inames=insn.within_inames,
depends_on=insn.depends_on depends_on=insn.depends_on
| frozenset({replace_accum_insn[insn.id].id}) | frozenset({insn.id})
| read_dependencies[insn.id], | read_dependencies[insn.id],
id=store_id)) ))
# exchange alias for vector
new_insns = [] new_insns = []
for insn in knl.instructions: for insn in knl.instructions:
insn = replace_accum_insn.get(insn.id, insn)
if insn.id not in read_insn_to_vec_instance.keys() | write_insn_to_vec_instance.keys(): if insn.id not in read_insn_to_vec_instance.keys() | write_insn_to_vec_instance.keys():
new_insns.append(insn) new_insns.append(insn)
else: else:
if insn.id in replace_accum_insn: subst_map = dict()
new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on for vec, expr in read_insn_to_vec_instance[insn.id]:
| read_dependencies[insn.id]) subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),))
else:
subst_map = dict() new_insn = insn
for vec, expr in read_insn_to_vec_instance[insn.id]:
subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),)) if insn in read_insns:
new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map),
class NodeSubstitutor(SubstitutionMapper): depends_on=new_insn.depends_on | read_dependencies[insn.id])
def __init__(self): if insn in write_insns:
from pymbolic.mapper.substitutor import make_subst_func new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map))
self.subst_func = make_subst_func(subst_map)
def map_call(self, expr, *args, **kwargs):
result = self.subst_func(expr)
if result is not None:
return result
else:
return SubstitutionMapper.map_call(self, expr, args, kwargs)
new_insn = insn
if insn in read_insns:
new_insn = new_insn.copy(expression=NodeSubstitutor()(new_insn.expression),
depends_on=new_insn.depends_on | read_dependencies[insn.id])
if insn in write_insns:
new_insn = new_insn.copy(assignee=NodeSubstitutor()(new_insn.assignee))
new_insns.append(new_insn) new_insns.append(new_insn)
from loopy.kernel.creation import resolve_dependencies from loopy.kernel.creation import resolve_dependencies
...@@ -239,7 +228,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -239,7 +228,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
def find_accumulation_inames(knl): def find_accumulation_inames(knl):
inames = set() inames = set()
for insn in knl.instructions: for insn in knl.instructions:
if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction): if any((n.startswith('r_') and n.endswith('alias') for n in insn.write_dependency_names())):
inames |= insn.within_inames inames |= insn.within_inames
inames = set((i for i in inames if i.startswith('micro') and not i.endswith('_x'))) inames = set((i for i in inames if i.startswith('micro') and not i.endswith('_x')))
......
...@@ -63,6 +63,14 @@ def type_floatingpoint(): ...@@ -63,6 +63,14 @@ def type_floatingpoint():
class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper):
def map_variable(self, expr, type_context):
tv = self.kernel.temporary_variables.get(expr.name)
if isinstance(tv, DuneTemporaryVariable) and tv.base_storage:
return prim.Variable(expr.name)
else:
return ExpressionToCExpressionMapper.map_variable(self, expr, type_context)
def map_subscript(self, expr, type_context): def map_subscript(self, expr, type_context):
arr = self.find_array(expr) arr = self.find_array(expr)
if isinstance(arr, (DuneTemporaryVariable, DuneGlobalArg)) and not arr.managed: if isinstance(arr, (DuneTemporaryVariable, DuneGlobalArg)) and not arr.managed:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment