From 323dcb6c950c0c2cffe0fd9a21a59bd4fca4cae3 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Mon, 24 Apr 2017 15:24:24 +0200
Subject: [PATCH] Unroll dimension loops in sumfact branch

---
 python/dune/perftool/options.py                |  5 +++++
 python/dune/perftool/sumfact/accumulation.py   | 12 +++++++++---
 python/dune/perftool/ufl/modified_terminals.py | 16 ++++++++++++----
 python/dune/perftool/ufl/preprocess.py         |  2 ++
 python/dune/perftool/ufl/visitor.py            | 12 ++++++++----
 5 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index 650626a5..7d7712d9 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -64,6 +64,7 @@ class PerftoolOptionsArray(ImmutableRecord):
 
     # Arguments that are mainly to be set by logic depending on other options
     max_vector_width = PerftoolOption(default=256, helpstr=None)
+    unroll_dimension_loops = PerftoolOption(default=False, helpstr="whether loops over the gemetric dimension should be unrolled.")
 
 
 # Until more sophisticated logic is needed, we keep the actual option data in this module
@@ -111,6 +112,10 @@ def update_options_from_inifile(opt):
 def process_options(opt):
     """ Make sure that the options have been fully processed """
     opt = expand_architecture_options(opt)
+
+    if opt.sumfact:
+        opt = opt.copy(unroll_dimension_loops=True)
+
     return opt
 
 
diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index f01d2bd3..ca1b52f5 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -40,6 +40,7 @@ from pytools import ImmutableRecord
 import loopy as lp
 import numpy as np
 import pymbolic.primitives as prim
+import ufl.classes as uc
 
 
 @iname
@@ -87,8 +88,6 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
     mod_arg_expr = accterm.argument.expr
     from ufl.classes import FunctionView, Argument
     while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)):
-        # If this has more than one operand we are potentially doing dangerous stuff
-        assert len(mod_arg_expr.ufl_operands) == 1
         mod_arg_expr = mod_arg_expr.ufl_operands[0]
     degree = mod_arg_expr.ufl_element()._degree
     basis_size = degree + 1
@@ -111,10 +110,17 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
         if accterm.is_jacobian:
             jacobian_inames = visitor.inames
 
+        # Determine the derivative direction
+        derivative = None
+        if accterm.new_indices:
+            derivative = indices[-1]
+        if isinstance(accterm.argument.expr, uc.Indexed):
+            derivative = accterm.argument.expr.ufl_operands[1][0]._value
+
         # Construct the matrix sequence for this sum factorization
         matrix_sequence = construct_basis_matrix_sequence(
             transpose=True,
-            derivative=indices[-1] if accterm.new_indices else None,
+            derivative=derivative,
             facedir=get_facedir(accterm.argument.restriction),
             facemod=get_facemod(accterm.argument.restriction),
             basis_size=basis_size)
diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py
index 50dd3315..3339eec7 100644
--- a/python/dune/perftool/ufl/modified_terminals.py
+++ b/python/dune/perftool/ufl/modified_terminals.py
@@ -4,6 +4,8 @@ from ufl.algorithms import MultiFunction
 from ufl.classes import MultiIndex
 from pytools import Record
 
+import ufl.classes as uc
+
 
 class Restriction:
     NONE = 0
@@ -92,8 +94,9 @@ class ModifiedTerminalTracker(MultiFunction):
 
 
 class ModifiedArgumentAnalysis(ModifiedTerminalTracker):
-    def __init__(self, do_index=False):
+    def __init__(self, do_index=False, do_fixed_index=False):
         self.do_index = do_index
+        self.do_fixed_index = do_fixed_index
         self.index = None
         ModifiedTerminalTracker.__init__(self)
 
@@ -102,7 +105,9 @@ class ModifiedArgumentAnalysis(ModifiedTerminalTracker):
         return self.call(o)
 
     def indexed(self, o):
-        if self.do_index:
+        if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]):
+            self.index = o.ufl_operands[1]
+        if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]):
             self.index = o.ufl_operands[1]
         return self.call(o.ufl_operands[0])
 
@@ -125,9 +130,10 @@ def analyse_modified_argument(expr, **kwargs):
 class _ModifiedArgumentExtractor(MultiFunction):
     """ A multifunction that extracts and returns the set of modified arguments """
 
-    def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_gradient=True):
+    def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_fixed_index=True, do_gradient=True):
         self.argnumber = argnumber
         self.coeffcount = coeffcount
+        self.do_fixed_index = do_fixed_index
         self.do_index = do_index
         self.do_gradient = do_gradient
         self.modified_arguments = set()
@@ -151,7 +157,9 @@ class _ModifiedArgumentExtractor(MultiFunction):
             return o
 
     def indexed(self, o):
-        if self.do_index:
+        if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]):
+            return self.pass_on(o)
+        if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]):
             return self.pass_on(o)
         else:
             self.expr(o)
diff --git a/python/dune/perftool/ufl/preprocess.py b/python/dune/perftool/ufl/preprocess.py
index 913bf5da..0a99d219 100644
--- a/python/dune/perftool/ufl/preprocess.py
+++ b/python/dune/perftool/ufl/preprocess.py
@@ -27,7 +27,9 @@ def apply_default_transformations(form):
     from dune.perftool.ufl.transformations import transform_form
     from dune.perftool.ufl.transformations.indexpushdown import pushdown_indexed
     from dune.perftool.ufl.transformations.reindexing import reindexing
+    from dune.perftool.ufl.transformations.unroll import unroll_dimension_loops
 
+    form = transform_form(form, unroll_dimension_loops)
     form = transform_form(form, pushdown_indexed)
     form = transform_form(form, reindexing)
 
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 38b7eee6..280c1d88 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -381,13 +381,17 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         if self.measure == 'exterior_facet':
             restriction = Restriction.NEGATIVE
 
-        # Dune only hast JacobianInverseTransposed as a first class citizen,
-        # so we need to switch the indices around!
         assert(len(self.indices) == 2)
-        indices = (self.indices[1], self.indices[0])
+        i, j = self.indices
         self.indices = None
 
-        return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), indices)
+        # Implement diagonal jacobians for unrolled matrices!
+        if isinstance(i, int) and isinstance(j, int) and i != j:
+            return 0
+
+        # Dune only has JacobianInverseTransposed as a first class citizen,
+        # so we need to switch the indices around!
+        return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), (j, i))
 
     def jacobian(self, o):
         raise NotImplementedError("How did you get Jacobian into your form? We only support JacobianInverse right now. Report!")
-- 
GitLab