Skip to content
Snippets Groups Projects
Commit 57ad4513 authored by Marcel Koch's avatar Marcel Koch
Browse files

add option for tail ordering

consecutive means the tail is within the same subelement inames as
the vectorized loop (except for the vectorized iname)
blocked means each tail has its on set of subelement inames
parent e78b0506
No related branches found
No related tags found
No related merge requests found
import loopy as lp import loopy as lp
import numpy as np import numpy as np
import pymbolic.primitives as prim import pymbolic.primitives as prim
from dune.codegen.blockstructured.tools import sub_element_inames
from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All, Not from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All, Not
from islpy import BasicSet from islpy import BasicSet
...@@ -468,13 +469,18 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz ...@@ -468,13 +469,18 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate)) temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate))
common_inames = knl.all_inames() common_inames = knl.all_inames()
for insn in insns_to_duplicate: for insn in new_insns:
common_inames = common_inames & (insn.within_inames | insn.reduction_inames()) common_inames = common_inames & (insn.within_inames | insn.reduction_inames())
if get_form_option('vectorization_blockstructured_tail_ordering') == 'blocked':
# TODO need to be more clever to get the right inames
macro_inames = frozenset((iname + '_0' * level) for iname in sub_element_inames())
common_inames = common_inames - macro_inames
additional_inames_to_duplicate = frozenset() additional_inames_to_duplicate = frozenset()
for insn in insns_to_duplicate: for insn in new_insns:
additional_inames_to_duplicate = additional_inames_to_duplicate | ((insn.within_inames | insn_inames = insn.within_inames | insn.reduction_inames()
insn.reduction_inames()) - common_inames) additional_inames_to_duplicate = additional_inames_to_duplicate | (insn_inames - common_inames)
knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate), knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate),
Or(tuple((Id(insn.id) for insn in new_insns)))) Or(tuple((Id(insn.id) for insn in new_insns))))
......
...@@ -101,7 +101,8 @@ class CodegenFormOptionsArray(ImmutableRecord): ...@@ -101,7 +101,8 @@ class CodegenFormOptionsArray(ImmutableRecord):
blockstructured = CodegenOption(default=False, helpstr="Use block structure") blockstructured = CodegenOption(default=False, helpstr="Use block structure")
number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction") number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction")
vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring") vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring")
vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length.") vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length")
vectorization_blockstructured_tail_ordering = CodegenOption(default='consecutive', helpstr="Ordering of the tail w.r.t the vectorized loop. Possible values: consecutive|blocked")
adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator") adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator")
control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable") control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable")
objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file") objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment