From 227304caad6f33374fe4c244e9a9587b290a4b7d Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Tue, 13 Dec 2016 13:53:31 +0100
Subject: [PATCH] Eliminate trivial reductions

For some reason, the patch refuses to schedule the kernel for sumfact_poisson_dg_symdiff.
---
 python/dune/perftool/sumfact/sumfact.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 539d2f69..4205aedb 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -308,7 +308,6 @@ def sum_factorization_kernel(a_matrices, buf, stage,
         # Get the inames needed for one matrix-matrix multiplication
         i = sumfact_iname(out_shape[0], "row")
         j = sumfact_iname(out_shape[1], "col")
-        k = sumfact_iname(a_matrix.cols, "red")
 
         # Maybe introduce a vectorization iname for this matrix-matrix multiplication
         vec_iname = ()
@@ -317,15 +316,27 @@ def sum_factorization_kernel(a_matrices, buf, stage,
             vec_iname = (prim.Variable(iname),)
             transform(lp.tag_inames, [(iname, "vec")])
 
-        # Construct the matrix-matrix-multiplication expression a_ik*in_kj
-        prod = Product((Subscript(Variable(a_matrix.name), (Variable(i), Variable(k)) + vec_iname),
-                        Subscript(Variable(inp), (Variable(k), Variable(j)) + vec_iname)
-                        ))
+        if a_matrix.cols == 1:
+            # A trivial reduction is implemented as a product, otherwise we run into
+            # a code generation corner case producing way too complicated code. This
+            # could be fixed upstream, but the loopy code realizing reductions is not
+            # trivial and the priority is kind of low.
+            matprod = Product((Subscript(Variable(a_matrix.name), (Variable(i), 0) + vec_iname),
+                               Subscript(Variable(inp), (0, Variable(j)) + vec_iname)
+                               ))
+        else:
+            k = sumfact_iname(a_matrix.cols, "red")
+
+            # Construct the matrix-matrix-multiplication expression a_ik*in_kj
+            prod = Product((Subscript(Variable(a_matrix.name), (Variable(i), Variable(k)) + vec_iname),
+                            Subscript(Variable(inp), (Variable(k), Variable(j)) + vec_iname)
+                            ))
+            matprod = Reduction("sum", k, prod)
 
         # Issue the reduction instruction that implements the multiplication
         # at the same time store the instruction ID for the next instruction to depend on
         insn_dep = frozenset({instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j)) + vec_iname),
-                                          expression=Reduction("sum", k, prod),
+                                          expression=matprod,
                                           forced_iname_deps=frozenset({i, j}).union(additional_inames),
                                           forced_iname_deps_is_final=True,
                                           depends_on=insn_dep,
-- 
GitLab