Skip to content
Snippets Groups Projects
Commit 3c94fc19 authored by Marcel Koch's avatar Marcel Koch
Browse files

use GlobalArg for array alias

resolves problem with non const pointer, but the modification of the
DuneExpressionToCExpressionMapper is still there, maybe even worse now
parent fb1ce62e
No related branches found
No related tags found
No related merge requests found
from dune.perftool.generation import temporary_variable, instruction from dune.perftool.generation import instruction
from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.target import dtype_floatingpoint
from dune.perftool.options import get_option from dune.perftool.options import get_option
from dune.perftool.pdelab.localoperator import determine_accumulation_space from dune.perftool.pdelab.localoperator import determine_accumulation_space
from dune.perftool.pdelab.argument import name_accumulation_variable from dune.perftool.pdelab.argument import name_accumulation_variable
from dune.perftool.pdelab.localoperator import boundary_predicates from dune.perftool.pdelab.localoperator import boundary_predicates
from dune.perftool.generation.loopy import function_mangler from dune.perftool.generation.loopy import function_mangler, globalarg
import loopy as lp import loopy as lp
import pymbolic.primitives as prim import pymbolic.primitives as prim
...@@ -13,9 +13,8 @@ def name_accumulation_alias(container, accumspace): ...@@ -13,9 +13,8 @@ def name_accumulation_alias(container, accumspace):
name = container+"_"+accumspace.lfs.name+"_alias" name = container+"_"+accumspace.lfs.name+"_alias"
k = get_option("number_of_blocks") k = get_option("number_of_blocks")
p = accumspace.element.degree() p = accumspace.element.degree()
temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), globalarg(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), managed=True)
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True) code = "auto {} = &{}.container()({},0);".format(container + "_"+accumspace.lfs.name+"_alias", container, accumspace.lfs.name)
code = "{} = &{}.container()({},0);".format(container + "_"+accumspace.lfs.name+"_alias", container, accumspace.lfs.name)
instruction(within_inames=frozenset(), instruction(within_inames=frozenset(),
code=code, code=code,
read_variables=frozenset({container}), read_variables=frozenset({container}),
...@@ -26,7 +25,6 @@ def name_accumulation_alias(container, accumspace): ...@@ -26,7 +25,6 @@ def name_accumulation_alias(container, accumspace):
@function_mangler @function_mangler
def residual_weight_mangler(knl, func, arg_dtypes): def residual_weight_mangler(knl, func, arg_dtypes):
if isinstance(func, str) and func.endswith('.weight'): if isinstance(func, str) and func.endswith('.weight'):
from pudb import set_trace; set_trace()
return lp.CallMangleInfo(func, (lp.types.NumpyType(dtype_floatingpoint()),), ()) return lp.CallMangleInfo(func, (lp.types.NumpyType(dtype_floatingpoint()),), ())
......
from dune.perftool.generation import (backend, from dune.perftool.generation import (backend,
kernel_cached, kernel_cached,
valuearg, temporary_variable, instruction) valuearg, instruction, globalarg)
from dune.perftool.options import get_option from dune.perftool.options import get_option
from dune.perftool.pdelab.argument import CoefficientAccess from dune.perftool.pdelab.argument import CoefficientAccess
from dune.perftool.blockstructured.tools import micro_index_to_macro_index, sub_element_inames from dune.perftool.blockstructured.tools import micro_index_to_macro_index, sub_element_inames
from loopy.types import NumpyType from loopy.types import NumpyType
import pymbolic.primitives as prim import pymbolic.primitives as prim
import loopy as lp
def name_alias(container, lfs, element): def name_alias(container, lfs, element):
name = container+"_"+lfs.name+"_alias" name = container+"_"+lfs.name+"_alias"
k = get_option("number_of_blocks") k = get_option("number_of_blocks")
p = element.degree() p = element.degree()
temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), globalarg(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), managed=True)
base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True) code = "const auto {} = &{}({},0);".format(name, container, lfs.name)
code = "{} = &{}({},0);".format(name,container, lfs.name)
instruction(within_inames=frozenset(), instruction(within_inames=frozenset(),
code=code, code=code,
read_variables=frozenset({container}), read_variables=frozenset({container}),
...@@ -39,6 +37,6 @@ def pymbolic_coefficient(container, lfs, element, index): ...@@ -39,6 +37,6 @@ def pymbolic_coefficient(container, lfs, element, index):
if get_option("vectorization_blockstructured"): if get_option("vectorization_blockstructured"):
subelem_inames = sub_element_inames() subelem_inames = sub_element_inames()
coeff_alias = name_alias(container, lfs, element) coeff_alias = name_alias(container, lfs, element)
return prim.Subscript(lp.TaggedVariable(coeff_alias, 'coeff_alias'), tuple(prim.Variable(i) for i in subelem_inames+index)) return prim.Subscript(prim.Variable(coeff_alias), tuple(prim.Variable(i) for i in subelem_inames+index))
else: else:
return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),)) return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),))
...@@ -4,9 +4,6 @@ import pymbolic.primitives as prim ...@@ -4,9 +4,6 @@ import pymbolic.primitives as prim
from dune.perftool.loopy.temporary import DuneTemporaryVariable from dune.perftool.loopy.temporary import DuneTemporaryVariable
from dune.perftool.loopy.symbolic import substitute from dune.perftool.loopy.symbolic import substitute
from dune.perftool.options import get_option from dune.perftool.options import get_option
from dune.perftool.pdelab.argument import PDELabAccumulationFunction, CoefficientAccess
from dune.perftool.tools import get_pymbolic_indices
from loopy.symbolic import SubstitutionMapper
def add_vcl_vector(knl, iname_inner, iname_outer): def add_vcl_vector(knl, iname_inner, iname_outer):
...@@ -23,15 +20,16 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -23,15 +20,16 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
self.insn_to_vec = insn_to_vec self.insn_to_vec = insn_to_vec
self.rec(expr) self.rec(expr)
def post_visit(self,expr): def visit(self, expr, **kwargs):
if isinstance(expr, prim.Subscript) \ if isinstance(expr, prim.Subscript) \
and isinstance(expr.aggregate, prim.Variable) \
and expr.aggregate.name.endswith('alias'): and expr.aggregate.name.endswith('alias'):
vec_name = expr.aggregate.name.replace('alias', 'vec') vec_name = expr.aggregate.name.replace('alias', 'vec')
self.vecs.add(vec_name) self.vecs.add(vec_name)
self.insn_to_vec.setdefault(self.id, set()) self.insn_to_vec.setdefault(self.id, set())
self.insn_to_vec[self.id].add((vec_name,expr)) self.insn_to_vec[self.id].add((vec_name,expr))
return return False
else:
return True
# find all instances where an alias is read from or written to # find all instances where an alias is read from or written to
# save the name of the corresponding vector and the alias subscript expression # save the name of the corresponding vector and the alias subscript expression
for insn in knl.instructions: for insn in knl.instructions:
...@@ -168,7 +166,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -168,7 +166,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
alias = vec.replace('vec', 'alias') alias = vec.replace('vec', 'alias')
# flat index without vec iname # flat index without vec iname
strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags) strides = tuple(tag.stride for tag in knl.arg_dict[alias].dim_tags)
index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides))) index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides)))
# add load instruction # add load instruction
...@@ -186,7 +184,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): ...@@ -186,7 +184,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
alias = vec.replace('vec', 'alias') alias = vec.replace('vec', 'alias')
# flat index without vec iname # flat index without vec iname
strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags) strides = tuple(tag.stride for tag in knl.arg_dict[alias].dim_tags)
index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides))) index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides)))
# add store instruction # add store instruction
......
...@@ -64,8 +64,8 @@ def type_floatingpoint(): ...@@ -64,8 +64,8 @@ def type_floatingpoint():
class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper):
def map_variable(self, expr, type_context): def map_variable(self, expr, type_context):
tv = self.kernel.temporary_variables.get(expr.name) arg = self.kernel.arg_dict.get(expr.name)
if isinstance(tv, DuneTemporaryVariable) and tv.base_storage: if isinstance(arg, DuneGlobalArg) and expr.name.endswith('alias'):
return prim.Variable(expr.name) return prim.Variable(expr.name)
else: else:
return ExpressionToCExpressionMapper.map_variable(self, expr, type_context) return ExpressionToCExpressionMapper.map_variable(self, expr, type_context)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment