From 7442aeb55cd9bbad196a81ae591ca541274d7c90 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Tue, 4 Oct 2016 17:20:46 +0200
Subject: [PATCH] More work for pullback

Various fixes, including diagonal jacobian transformations.
---
 python/dune/perftool/loopy/duplicate.py       | 21 ++++++++
 python/dune/perftool/pdelab/localoperator.py  | 30 +++++------
 .../ufl/transformations/axiparallel.py        | 52 +++++++++++++++++++
 test/stokes/stokes_stress.mini                |  6 ++-
 4 files changed, 90 insertions(+), 19 deletions(-)
 create mode 100644 python/dune/perftool/loopy/duplicate.py
 create mode 100644 python/dune/perftool/ufl/transformations/axiparallel.py

diff --git a/python/dune/perftool/loopy/duplicate.py b/python/dune/perftool/loopy/duplicate.py
new file mode 100644
index 00000000..7f88e2ff
--- /dev/null
+++ b/python/dune/perftool/loopy/duplicate.py
@@ -0,0 +1,21 @@
+""" Iname duplication strategies to make kernels schedulable """
+
+from loopy import (has_schedulable_iname_nesting,
+                   get_iname_duplication_options,
+                   duplicate_inames,
+                   )
+
+
+def heuristic_duplication(kernel):
+    # If the given kernel is schedulable, nothing needs to be done.
+    if has_schedulable_iname_nesting(kernel):
+        return kernel
+
+    # List all duplication options and return the transformed
+    # kernel if one such duplication transformation was enough to solve the problem.
+    for iname, within in get_iname_duplication_options(kernel):
+        dup_kernel = duplicate_inames(kernel, iname, within)
+        if has_schedulable_iname_nesting(dup_kernel):
+            return dup_kernel
+
+    raise NotImplementedError("Your kernel needs multiple iname duplications! No generic algorithm implemented for that yet! (#39)")
\ No newline at end of file
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 612eb77b..01de0906 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -244,6 +244,11 @@ def generate_kernel(integrals):
         subdomain_id = integral.subdomain_id()
         subdomain_data = integral.subdomain_data()
 
+        # Maybe make the jacobian inverse diagonal!
+        if get_option('diagonal_transformation_matrix'):
+            from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian
+            integrand = diagonal_jacobian(integrand)
+
         from dune.perftool.ufl.dimensionindex import dimension_index_mapping
         dimension_indices = dimension_index_mapping(integrand)
 
@@ -285,23 +290,14 @@ def generate_kernel(integrals):
 
     kernel = preprocess_kernel(kernel)
 
-    # see whether we need iname duplication to make this thing schedulable!
-    # TODO: Use a clever strategy here, instead of random transformation until the problem is resolved
-    from loopy import has_schedulable_iname_nesting, get_iname_duplication_options, duplicate_inames
-    while not has_schedulable_iname_nesting(kernel):
-        # If there is a duplication that solves the problem with just one duplication, we pick that one
-        iname, within = (None, None)
-        for i, w in get_iname_duplication_options(kernel):
-            if has_schedulable_iname_nesting(duplicate_inames(kernel, i, w)):
-                iname, within = (i, w)
-
-        # Otherwise pick a random one.
-        if iname is None:
-            iname, within = next(get_iname_duplication_options(kernel))
-
-        # Do the transformation
-        print("Applying iname duplication to measure {}: iname {}; within {}".format(measure, iname, within))
-        kernel = duplicate_inames(kernel, iname, within)
+    from dune.perftool.loopy.duplicate import heuristic_duplication
+    kernel = heuristic_duplication(kernel)
+
+    # This is also silly, but 1D DG Schemes never need the geometry, so the quadrature
+    # statement actually introduces a preamble at a stage where preambles cannot be generated
+    # anymore. TODO think about how to avoid this!!!
+    from dune.perftool.pdelab.quadrature import quadrature_loop_statement
+    quadrature_loop_statement()
 
     # Loopy might have introduced some temporary variables during preprocessing. As I want to have my own
     # temporary declaration code right now, I call the declaration preamble manually.
diff --git a/python/dune/perftool/ufl/transformations/axiparallel.py b/python/dune/perftool/ufl/transformations/axiparallel.py
new file mode 100644
index 00000000..6cfbf2fe
--- /dev/null
+++ b/python/dune/perftool/ufl/transformations/axiparallel.py
@@ -0,0 +1,52 @@
+"""
+The Jacobian Inverse Transposed for axiparallel cube grids is diagonal.
+We enforce this through a post-processing transformation (instead of hacking it into UFL)
+"""
+
+from dune.perftool.ufl.transformations import ufl_transformation
+from ufl.algorithms import MultiFunction
+from ufl.classes import (Indexed,
+                         JacobianInverse,
+                         MultiIndex,
+                         Product,
+                         Restricted,
+                         )
+
+
+class DiagonalJIT(MultiFunction):
+    def __init__(self):
+        self.replace = {}
+        MultiFunction.__init__(self)
+
+    def expr(self, o):
+        return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands))
+
+    def multi_index(self, o):
+#         from pudb import set_trace; set_trace()
+        return MultiIndex(tuple(self.replace.get(i, i) for i in o))
+
+    def index_sum(self, o):
+        e, i = o.ufl_operands
+        if isinstance(e, Product):
+            p1, p2 = e.ufl_operands
+            if isinstance(p1, Indexed):
+                p, ii = p1.ufl_operands
+                if isinstance(p, Restricted):
+                    p = p.ufl_operands[0]
+                if isinstance(p, JacobianInverse):
+                    assert(i[0] == ii[0])
+                    self.replace[ii[0]] = ii[1]
+                    ret = self(e)
+                    self.replace = {}
+                    return ret
+
+        return self.reuse_if_untouched(o, self(e), i)
+
+
+@ufl_transformation(name='axiparallel')
+def diagonal_jacobian(e):
+    """
+    This transformations can generically be described as:
+    \sum_k J^{(+/-)}_{k,i}x_k ==> J^{(+/-)}_{i,i}x_i
+    """
+    return DiagonalJIT()(e)
diff --git a/test/stokes/stokes_stress.mini b/test/stokes/stokes_stress.mini
index 4481e91d..09e874f1 100644
--- a/test/stokes/stokes_stress.mini
+++ b/test/stokes/stokes_stress.mini
@@ -1,5 +1,6 @@
 __name = stokes_stress_{__exec_suffix}
-__exec_suffix = symdiff, numdiff | expand num
+#__exec_suffix = symdiff, numdiff | expand num
+__exec_suffix = numdiff
 
 lowerleft = 0.0 0.0
 upperright = 1.0 1.0
@@ -13,6 +14,7 @@ reference = hagenpoiseuille_ref
 extension = vtu
 
 [formcompiler]
-numerical_jacobian = 0, 1 | expand num
+#numerical_jacobian = 0, 1 | expand num
+numerical_jacobian = 1
 exact_solution_expression = g
 compare_l2errorsquared = 1e-6
-- 
GitLab