From 3c94fc1989435b4acc4749597e5c582a36094c21 Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Tue, 30 Jan 2018 16:17:38 +0100 Subject: [PATCH] use GlobalArg for array alias resolves problem with non const pointer, but the modification of the DuneExpressionToCExpressionMapper is still there, maybe even worse now --- .../dune/perftool/blockstructured/accumulation.py | 10 ++++------ python/dune/perftool/blockstructured/argument.py | 10 ++++------ .../dune/perftool/blockstructured/vectorization.py | 14 ++++++-------- python/dune/perftool/loopy/target.py | 4 ++-- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/python/dune/perftool/blockstructured/accumulation.py b/python/dune/perftool/blockstructured/accumulation.py index a6024cf9..d9fe2a1f 100644 --- a/python/dune/perftool/blockstructured/accumulation.py +++ b/python/dune/perftool/blockstructured/accumulation.py @@ -1,10 +1,10 @@ -from dune.perftool.generation import temporary_variable, instruction +from dune.perftool.generation import instruction from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.options import get_option from dune.perftool.pdelab.localoperator import determine_accumulation_space from dune.perftool.pdelab.argument import name_accumulation_variable from dune.perftool.pdelab.localoperator import boundary_predicates -from dune.perftool.generation.loopy import function_mangler +from dune.perftool.generation.loopy import function_mangler, globalarg import loopy as lp import pymbolic.primitives as prim @@ -13,9 +13,8 @@ def name_accumulation_alias(container, accumspace): name = container+"_"+accumspace.lfs.name+"_alias" k = get_option("number_of_blocks") p = accumspace.element.degree() - temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), - base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True) - code = "{} = &{}.container()({},0);".format(container + "_"+accumspace.lfs.name+"_alias", container, accumspace.lfs.name) + globalarg(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), managed=True) + code = "auto {} = &{}.container()({},0);".format(container + "_"+accumspace.lfs.name+"_alias", container, accumspace.lfs.name) instruction(within_inames=frozenset(), code=code, read_variables=frozenset({container}), @@ -26,7 +25,6 @@ def name_accumulation_alias(container, accumspace): @function_mangler def residual_weight_mangler(knl, func, arg_dtypes): if isinstance(func, str) and func.endswith('.weight'): - from pudb import set_trace; set_trace() return lp.CallMangleInfo(func, (lp.types.NumpyType(dtype_floatingpoint()),), ()) diff --git a/python/dune/perftool/blockstructured/argument.py b/python/dune/perftool/blockstructured/argument.py index 1d4fe319..fa03cba9 100644 --- a/python/dune/perftool/blockstructured/argument.py +++ b/python/dune/perftool/blockstructured/argument.py @@ -1,21 +1,19 @@ from dune.perftool.generation import (backend, kernel_cached, - valuearg, temporary_variable, instruction) + valuearg, instruction, globalarg) from dune.perftool.options import get_option from dune.perftool.pdelab.argument import CoefficientAccess from dune.perftool.blockstructured.tools import micro_index_to_macro_index, sub_element_inames from loopy.types import NumpyType import pymbolic.primitives as prim -import loopy as lp def name_alias(container, lfs, element): name = container+"_"+lfs.name+"_alias" k = get_option("number_of_blocks") p = element.degree() - temporary_variable(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), - base_storage="dummy_"+container, managed=True, _base_storage_access_may_be_aliasing=True) - code = "{} = &{}({},0);".format(name,container, lfs.name) + globalarg(name, shape=(k + 1, k + 1, p + 1, p + 1), strides=(p, p * k + 1, 1, k + 1), managed=True) + code = "const auto {} = &{}({},0);".format(name, container, lfs.name) instruction(within_inames=frozenset(), code=code, read_variables=frozenset({container}), @@ -39,6 +37,6 @@ def pymbolic_coefficient(container, lfs, element, index): if get_option("vectorization_blockstructured"): subelem_inames = sub_element_inames() coeff_alias = name_alias(container, lfs, element) - return prim.Subscript(lp.TaggedVariable(coeff_alias, 'coeff_alias'), tuple(prim.Variable(i) for i in subelem_inames+index)) + return prim.Subscript(prim.Variable(coeff_alias), tuple(prim.Variable(i) for i in subelem_inames+index)) else: return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),)) diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py index 0e15869e..af64fab9 100644 --- a/python/dune/perftool/blockstructured/vectorization.py +++ b/python/dune/perftool/blockstructured/vectorization.py @@ -4,9 +4,6 @@ import pymbolic.primitives as prim from dune.perftool.loopy.temporary import DuneTemporaryVariable from dune.perftool.loopy.symbolic import substitute from dune.perftool.options import get_option -from dune.perftool.pdelab.argument import PDELabAccumulationFunction, CoefficientAccess -from dune.perftool.tools import get_pymbolic_indices -from loopy.symbolic import SubstitutionMapper def add_vcl_vector(knl, iname_inner, iname_outer): @@ -23,15 +20,16 @@ def add_vcl_vector(knl, iname_inner, iname_outer): self.insn_to_vec = insn_to_vec self.rec(expr) - def post_visit(self,expr): + def visit(self, expr, **kwargs): if isinstance(expr, prim.Subscript) \ - and isinstance(expr.aggregate, prim.Variable) \ and expr.aggregate.name.endswith('alias'): vec_name = expr.aggregate.name.replace('alias', 'vec') self.vecs.add(vec_name) self.insn_to_vec.setdefault(self.id, set()) self.insn_to_vec[self.id].add((vec_name,expr)) - return + return False + else: + return True # find all instances where an alias is read from or written to # save the name of the corresponding vector and the alias subscript expression for insn in knl.instructions: @@ -168,7 +166,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): alias = vec.replace('vec', 'alias') # flat index without vec iname - strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags) + strides = tuple(tag.stride for tag in knl.arg_dict[alias].dim_tags) index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides))) # add load instruction @@ -186,7 +184,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): alias = vec.replace('vec', 'alias') # flat index without vec iname - strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags) + strides = tuple(tag.stride for tag in knl.arg_dict[alias].dim_tags) index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides))) # add store instruction diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index ec293719..2c12f8a1 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -64,8 +64,8 @@ def type_floatingpoint(): class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): def map_variable(self, expr, type_context): - tv = self.kernel.temporary_variables.get(expr.name) - if isinstance(tv, DuneTemporaryVariable) and tv.base_storage: + arg = self.kernel.arg_dict.get(expr.name) + if isinstance(arg, DuneGlobalArg) and expr.name.endswith('alias'): return prim.Variable(expr.name) else: return ExpressionToCExpressionMapper.map_variable(self, expr, type_context) -- GitLab