diff --git a/python/dune/codegen/blockstructured/accumulation.py b/python/dune/codegen/blockstructured/accumulation.py index 9af9759d5e3426df12a10db39f94048e48006867..e649fe02308b8bfbc44ec46cc11083d240e48457 100644 --- a/python/dune/codegen/blockstructured/accumulation.py +++ b/python/dune/codegen/blockstructured/accumulation.py @@ -1,12 +1,12 @@ -from dune.codegen.blockstructured.tools import sub_element_inames -from dune.codegen.generation import accumulation_mixin, instruction +from dune.codegen.blockstructured.tools import sub_element_inames, name_accumulation_alias +from dune.codegen.generation import accumulation_mixin, instruction, get_global_context_value from dune.codegen.loopy.target import dtype_floatingpoint from dune.codegen.options import get_form_option from dune.codegen.pdelab.geometry import world_dimension, name_intersection_geometry_wrapper from dune.codegen.pdelab.localoperator import determine_accumulation_space, GenericAccumulationMixin from dune.codegen.pdelab.argument import name_accumulation_variable from dune.codegen.pdelab.localoperator import boundary_predicates -from dune.codegen.generation.loopy import function_mangler, globalarg, temporary_variable +from dune.codegen.generation.loopy import function_mangler, temporary_variable import loopy as lp import pymbolic.primitives as prim @@ -16,32 +16,10 @@ from loopy.match import Writes @accumulation_mixin("blockstructured") class BlockStructuredAccumulationMixin(GenericAccumulationMixin): def generate_accumulation_instruction(self, expr): - if get_form_option('vectorization_blockstructured'): - return generate_accumulation_instruction_vectorized(expr, self) - else: + if get_global_context_value("form_type") == "jacobians": return generate_accumulation_instruction(expr, self) - - -def name_accumulation_alias(container, accumspace): - name = container + "_" + accumspace.lfs.name + "_alias" - name_tail = container + "_" + accumspace.lfs.name + "_alias_tail" - k = get_form_option("number_of_blocks") - p = accumspace.element.degree() - - def _add_alias_insn(name): - dim = world_dimension() - element_stride = tuple(p * (p * k + 1)**i for i in range(0, dim)) - index_stride = tuple((p * k + 1)**i for i in range(0, dim)) - globalarg(name, shape=(k,) * dim + (p + 1,) * dim, strides=element_stride + index_stride, managed=True) - code = "auto {} = &{}.container()({},0);".format(name, container, accumspace.lfs.name) - instruction(within_inames=frozenset(), - code=code, - read_variables=frozenset({container}), - assignees=frozenset({name})) - - _add_alias_insn(name) - _add_alias_insn(name_tail) - return name + else: + return generate_accumulation_instruction_vectorized(expr, self) @function_mangler diff --git a/python/dune/codegen/blockstructured/argument.py b/python/dune/codegen/blockstructured/argument.py index 420773e85bea93ee55cb310255da7fa60d55d9de..deff1b8415d246d0ed197692df46b61db2dbd5cc 100644 --- a/python/dune/codegen/blockstructured/argument.py +++ b/python/dune/codegen/blockstructured/argument.py @@ -1,29 +1,11 @@ -from dune.codegen.generation import (kernel_cached, - valuearg, instruction, globalarg) +from dune.codegen.generation import kernel_cached, valuearg from dune.codegen.options import get_form_option from dune.codegen.pdelab.argument import CoefficientAccess -from dune.codegen.blockstructured.tools import micro_index_to_macro_index, sub_element_inames -from dune.codegen.pdelab.geometry import world_dimension +from dune.codegen.blockstructured.tools import micro_index_to_macro_index, sub_element_inames, name_container_alias from loopy.types import NumpyType import pymbolic.primitives as prim -def name_alias(container, lfs, element): - name = container + "_" + lfs.name + "_alias" - k = get_form_option("number_of_blocks") - p = element.degree() - dim = world_dimension() - element_stride = tuple(p * (p * k + 1)**i for i in range(0, dim)) - index_stride = tuple((p * k + 1)**i for i in range(0, dim)) - globalarg(name, shape=(k,) * dim + (p + 1,) * dim, strides=element_stride + index_stride, managed=True) - code = "const auto {} = &{}({},0);".format(name, container, lfs.name) - instruction(within_inames=frozenset(), - code=code, - read_variables=frozenset({container}), - assignees=frozenset({name})) - return name - - # TODO remove the need for element @kernel_cached def pymbolic_coefficient(container, lfs, element, index): @@ -36,9 +18,6 @@ def pymbolic_coefficient(container, lfs, element, index): lfs = prim.Variable(lfs) # use higher order FEM index instead of Q1 index - if get_form_option("vectorization_blockstructured"): - subelem_inames = sub_element_inames() - coeff_alias = name_alias(container, lfs, element) - return prim.Subscript(prim.Variable(coeff_alias), tuple(prim.Variable(i) for i in subelem_inames + index)) - else: - return prim.Call(CoefficientAccess(container), (lfs, micro_index_to_macro_index(element, index),)) + subelem_inames = sub_element_inames() + coeff_alias = name_container_alias(container, lfs, element) + return prim.Subscript(prim.Variable(coeff_alias), tuple(prim.Variable(i) for i in subelem_inames + index)) diff --git a/python/dune/codegen/blockstructured/tools.py b/python/dune/codegen/blockstructured/tools.py index 802b819f23754aa0862e122f4127a3db18593117..e9acf640cc3f6febad32dd9bec40cda7d75b881f 100644 --- a/python/dune/codegen/blockstructured/tools.py +++ b/python/dune/codegen/blockstructured/tools.py @@ -2,7 +2,7 @@ from dune.codegen.tools import get_pymbolic_basename from dune.codegen.generation import (iname, domain, temporary_variable, - instruction) + instruction, globalarg, preamble) from dune.codegen.pdelab.geometry import world_dimension from dune.codegen.options import get_form_option import pymbolic.primitives as prim @@ -74,3 +74,33 @@ def name_point_in_macro(point_in_micro, visitor): name = get_pymbolic_basename(point_in_micro) + "_macro" define_point_in_macro(name, point_in_micro, visitor) return name + + +@preamble +def define_container_alias(name, container, lfs, element, is_const): + k = get_form_option("number_of_blocks") + p = element.degree() + dim = world_dimension() + element_stride = tuple(p * (p * k + 1)**i for i in range(0, dim)) + index_stride = tuple((p * k + 1)**i for i in range(0, dim)) + globalarg(name, shape=(k,) * dim + (p + 1,) * dim, strides=element_stride + index_stride, managed=True) + if is_const: + return "const auto {} = &{}({},0);".format(name, container, lfs.name) + else: + return "auto {} = &{}.container()({},0);".format(name, container, lfs.name) + + +def name_accumulation_alias(container, accumspace): + name = container + "_" + accumspace.lfs.name + "_alias" + name_tail = container + "_" + accumspace.lfs.name + "_alias_tail" + + define_container_alias(name, container, accumspace.lfs, accumspace.element, is_const=False) + define_container_alias(name_tail, container, accumspace.lfs, accumspace.element, is_const=False) + return name + + +def name_container_alias(container, lfs, element): + name = container + "_" + lfs.name + "_alias" + + define_container_alias(name, container, lfs, element, is_const=True) + return name diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index d5224e6ffdd19ae88ccbdc6a798a8b416a6fd36d..94bc4875f262c72d695007be04c0b4107208f2f5 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -26,20 +26,22 @@ def add_vcl_temporaries(knl, vcl_size): init_iname = 'init_vec{}'.format(vcl_size) from islpy import BasicSet init_domain = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(init_iname, get_vcl_type_size(dtype_floatingpoint()))) + + silenced_warnings = [] + for alias in vector_alias: vector_name = alias.replace('alias', 'vec{}'.format(vcl_size)) new_vec_temporaries[vector_name] = DuneTemporaryVariable(vector_name, dtype=np.float64, shape=(vcl_size,), managed=True, scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) - # write once to the vector such that loopy won't complain - new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(vector_name), prim.Variable(init_iname)), - expression=0, within_inames=frozenset({init_iname}), - id='init_{}'.format(vector_name))) + # silence warning such that loopy won't complain + silenced_warnings.append("read_no_write({})".format(vector_name)) from loopy.kernel.data import VectorizeTag return knl.copy(instructions=knl.instructions + new_insns, domains=knl.domains + [init_domain], temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries), - iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()})) + iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()}), + silenced_warnings=knl.silenced_warnings + silenced_warnings) def add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size, level): @@ -248,6 +250,7 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): load_insns.append(lp.CallInstruction(assignees=(), expression=call_load, id=load_id, within_inames=insn.within_inames | insn.reduction_inames(), depends_on=insn.depends_on | write_ids, + depends_on_is_final=True, tags=frozenset({'vectorized_{}'.format(level)}))) read_dependencies.setdefault(id, set()) read_dependencies[id].add(load_id) @@ -277,6 +280,7 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): id=store_id, within_inames=insn.within_inames, depends_on=(insn.depends_on | frozenset({id}) | read_dependencies[id] | write_ids), + depends_on_is_final=True, tags=frozenset({'vectorized_{}'.format(level)}))) # replace alias with vcl vector, except for accumulation assignee @@ -341,10 +345,12 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0): new_insns.append(insn.copy(assignee=assignee_vec, depends_on=(insn.depends_on | read_dependencies[insn.id] | write_ids), + depends_on_is_final=True, tags=insn.tags | frozenset({'vectorized_{}'.format(level)}))) else: new_insns.append(insn.copy(depends_on=(insn.depends_on | read_dependencies[insn.id] | write_ids), + depends_on_is_final=True, tags=insn.tags | frozenset({'vectorized_{}'.format(level)}))) return knl.copy(instructions=new_insns + load_insns + store_insns)