Unroll dimension loops in sumfact branch

323dcb6c · Dominic Kempf · 93b61c97 · 323dcb6c · 323dcb6c · 323dcb6c
Commit 323dcb6c authored 7 years ago by Dominic Kempf
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -64,6 +64,7 @@ class PerftoolOptionsArray(ImmutableRecord):

    # Arguments that are mainly to be set by logic depending on other options
    max_vector_width = PerftoolOption(default=256, helpstr=None)
+    unroll_dimension_loops = PerftoolOption(default=False, helpstr="whether loops over the gemetric dimension should be unrolled.")


 # Until more sophisticated logic is needed, we keep the actual option data in this module
@@ -111,6 +112,10 @@ def update_options_from_inifile(opt):
 def process_options(opt):
    """ Make sure that the options have been fully processed """
    opt = expand_architecture_options(opt)
+
+    if opt.sumfact:
+        opt = opt.copy(unroll_dimension_loops=True)
+
    return opt



--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -40,6 +40,7 @@ from pytools import ImmutableRecord
 import loopy as lp
 import numpy as np
 import pymbolic.primitives as prim
+import ufl.classes as uc


 @iname
@@ -87,8 +88,6 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
    mod_arg_expr = accterm.argument.expr
    from ufl.classes import FunctionView, Argument
    while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)):
-        # If this has more than one operand we are potentially doing dangerous stuff
-        assert len(mod_arg_expr.ufl_operands) == 1
        mod_arg_expr = mod_arg_expr.ufl_operands[0]
    degree = mod_arg_expr.ufl_element()._degree
    basis_size = degree + 1
@@ -111,10 +110,17 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
        if accterm.is_jacobian:
            jacobian_inames = visitor.inames

+        # Determine the derivative direction
+        derivative = None
+        if accterm.new_indices:
+            derivative = indices[-1]
+        if isinstance(accterm.argument.expr, uc.Indexed):
+            derivative = accterm.argument.expr.ufl_operands[1][0]._value
+
        # Construct the matrix sequence for this sum factorization
        matrix_sequence = construct_basis_matrix_sequence(
            transpose=True,
-            derivative=indices[-1] if accterm.new_indices else None,
+            derivative=derivative,
            facedir=get_facedir(accterm.argument.restriction),
            facemod=get_facemod(accterm.argument.restriction),
            basis_size=basis_size)

--- a/python/dune/perftool/ufl/modified_terminals.py
+++ b/python/dune/perftool/ufl/modified_terminals.py
@@ -4,6 +4,8 @@ from ufl.algorithms import MultiFunction
 from ufl.classes import MultiIndex
 from pytools import Record

+import ufl.classes as uc
+

 class Restriction:
    NONE = 0
@@ -92,8 +94,9 @@ class ModifiedTerminalTracker(MultiFunction):


 class ModifiedArgumentAnalysis(ModifiedTerminalTracker):
-    def __init__(self, do_index=False):
+    def __init__(self, do_index=False, do_fixed_index=False):
        self.do_index = do_index
+        self.do_fixed_index = do_fixed_index
        self.index = None
        ModifiedTerminalTracker.__init__(self)

@@ -102,7 +105,9 @@ class ModifiedArgumentAnalysis(ModifiedTerminalTracker):
        return self.call(o)

    def indexed(self, o):
-        if self.do_index:
+        if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]):
+            self.index = o.ufl_operands[1]
+        if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]):
            self.index = o.ufl_operands[1]
        return self.call(o.ufl_operands[0])

@@ -125,9 +130,10 @@ def analyse_modified_argument(expr, **kwargs):
 class _ModifiedArgumentExtractor(MultiFunction):
    """ A multifunction that extracts and returns the set of modified arguments """

-    def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_gradient=True):
+    def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_fixed_index=True, do_gradient=True):
        self.argnumber = argnumber
        self.coeffcount = coeffcount
+        self.do_fixed_index = do_fixed_index
        self.do_index = do_index
        self.do_gradient = do_gradient
        self.modified_arguments = set()
@@ -151,7 +157,9 @@ class _ModifiedArgumentExtractor(MultiFunction):
            return o

    def indexed(self, o):
-        if self.do_index:
+        if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]):
+            return self.pass_on(o)
+        if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]):
            return self.pass_on(o)
        else:
            self.expr(o)

--- a/python/dune/perftool/ufl/preprocess.py
+++ b/python/dune/perftool/ufl/preprocess.py
@@ -27,7 +27,9 @@ def apply_default_transformations(form):
    from dune.perftool.ufl.transformations import transform_form
    from dune.perftool.ufl.transformations.indexpushdown import pushdown_indexed
    from dune.perftool.ufl.transformations.reindexing import reindexing
+    from dune.perftool.ufl.transformations.unroll import unroll_dimension_loops

+    form = transform_form(form, unroll_dimension_loops)
    form = transform_form(form, pushdown_indexed)
    form = transform_form(form, reindexing)


--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -381,13 +381,17 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
        if self.measure == 'exterior_facet':
            restriction = Restriction.NEGATIVE

-        # Dune only hast JacobianInverseTransposed as a first class citizen,
-        # so we need to switch the indices around!
        assert(len(self.indices) == 2)
-        indices = (self.indices[1], self.indices[0])
+        i, j = self.indices
        self.indices = None

-        return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), indices)
+        # Implement diagonal jacobians for unrolled matrices!
+        if isinstance(i, int) and isinstance(j, int) and i != j:
+            return 0
+
+        # Dune only has JacobianInverseTransposed as a first class citizen,
+        # so we need to switch the indices around!
+        return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), (j, i))

    def jacobian(self, o):
        raise NotImplementedError("How did you get Jacobian into your form? We only support JacobianInverse right now. Report!")