From 57ad4513d47467f1335d80e2c69aae5d31c07b9f Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 15 Feb 2019 16:03:44 +0100 Subject: [PATCH] add option for tail ordering consecutive means the tail is within the same subelement inames as the vectorized loop (except for the vectorized iname) blocked means each tail has its on set of subelement inames --- .../dune/codegen/blockstructured/vectorization.py | 14 ++++++++++---- python/dune/codegen/options.py | 3 ++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 22942d66..cfdfc794 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -1,6 +1,7 @@ import loopy as lp import numpy as np import pymbolic.primitives as prim +from dune.codegen.blockstructured.tools import sub_element_inames from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All, Not from islpy import BasicSet @@ -468,13 +469,18 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate)) common_inames = knl.all_inames() - for insn in insns_to_duplicate: + for insn in new_insns: common_inames = common_inames & (insn.within_inames | insn.reduction_inames()) + if get_form_option('vectorization_blockstructured_tail_ordering') == 'blocked': + # TODO need to be more clever to get the right inames + macro_inames = frozenset((iname + '_0' * level) for iname in sub_element_inames()) + common_inames = common_inames - macro_inames + additional_inames_to_duplicate = frozenset() - for insn in insns_to_duplicate: - additional_inames_to_duplicate = additional_inames_to_duplicate | ((insn.within_inames | - insn.reduction_inames()) - common_inames) + for insn in new_insns: + insn_inames = insn.within_inames | insn.reduction_inames() + additional_inames_to_duplicate = additional_inames_to_duplicate | (insn_inames - common_inames) knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate), Or(tuple((Id(insn.id) for insn in new_insns)))) diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index bf80a72a..00fc2e48 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -101,7 +101,8 @@ class CodegenFormOptionsArray(ImmutableRecord): blockstructured = CodegenOption(default=False, helpstr="Use block structure") number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction") vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring") - vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length.") + vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length") + vectorization_blockstructured_tail_ordering = CodegenOption(default='consecutive', helpstr="Ordering of the tail w.r.t the vectorized loop. Possible values: consecutive|blocked") adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator") control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable") objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file") -- GitLab