From 05c64326fb9e3d5c0b27e04ad49eb76358a55beb Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Thu, 31 Jan 2019 11:34:42 +0100
Subject: [PATCH] use fma for inverse

3d -> 9 fma + 18 mul (without -1 *)
2 fma more than C++, 4 mul/add more than C++
---
 python/dune/codegen/pdelab/tensors.py | 52 +++++++++++----------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/python/dune/codegen/pdelab/tensors.py b/python/dune/codegen/pdelab/tensors.py
index a1a52a4c..360f9a14 100644
--- a/python/dune/codegen/pdelab/tensors.py
+++ b/python/dune/codegen/pdelab/tensors.py
@@ -24,8 +24,7 @@ def define_determinant(name, matrix, shape, visitor):
 
     matrix_entry = [[prim.Subscript(prim.Variable(matrix), (i, j)) for j in range(dim)] for i in range(dim)]
     if dim == 2:
-        expr_determinant = FMA(matrix_entry[0][0], matrix_entry[1][1],
-                               -1 * prim.Product((matrix_entry[1][0], matrix_entry[0][1])))
+        expr_determinant = FMA(matrix_entry[0][0], matrix_entry[1][1], -1 * matrix_entry[1][0] * matrix_entry[0][1])
 
     elif dim == 3:
         fma_A = FMA(matrix_entry[1][1], matrix_entry[2][2], -1 * matrix_entry[1][2] * matrix_entry[2][1])
@@ -73,35 +72,26 @@ def define_matrix_inverse(name, name_inv, shape, visitor):
                 sign = 1. if i == j else -1.
                 exprs[i][j] = prim.Product((sign, prim.Variable(det_inv), matrix_entry[1 - i][1 - j]))
     elif dim == 3:
-        exprs[0][0] = prim.Product((1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[1][1], matrix_entry[2][2])),
-                                              -1 * prim.Product((matrix_entry[1][2], matrix_entry[2][1]))))))
-        exprs[1][0] = prim.Product((-1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][1], matrix_entry[2][2])),
-                                              -1 * prim.Product((matrix_entry[0][2], matrix_entry[2][1]))))))
-        exprs[2][0] = prim.Product((1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][1], matrix_entry[1][2])),
-                                              -1 * prim.Product((matrix_entry[0][2], matrix_entry[1][1]))))))
-
-        exprs[0][1] = prim.Product((-1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[1][0], matrix_entry[2][2])),
-                                              -1 * prim.Product((matrix_entry[1][2], matrix_entry[2][0]))))))
-        exprs[1][1] = prim.Product((1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[2][2])),
-                                              -1 * prim.Product((matrix_entry[0][2], matrix_entry[2][0]))))))
-        exprs[2][1] = prim.Product((-1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][2])),
-                                              -1 * prim.Product((matrix_entry[0][2], matrix_entry[1][0]))))))
-
-        exprs[0][2] = prim.Product((1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[1][0], matrix_entry[2][1])),
-                                              -1 * prim.Product((matrix_entry[1][1], matrix_entry[2][0]))))))
-        exprs[1][2] = prim.Product((-1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[2][1])),
-                                              -1 * prim.Product((matrix_entry[0][1], matrix_entry[2][0]))))))
-        exprs[2][2] = prim.Product((1., prim.Variable(det_inv),
-                                    prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][1])),
-                                              -1 * prim.Product((matrix_entry[0][1], matrix_entry[1][0]))))))
+        exprs[0][0] = prim.Variable(det_inv) * FMA(matrix_entry[1][1], matrix_entry[2][2],
+                                                   -1 * matrix_entry[1][2] * matrix_entry[2][1])
+        exprs[1][0] = prim.Variable(det_inv) * FMA(matrix_entry[0][1], matrix_entry[2][2],
+                                                   -1 * matrix_entry[0][2] * matrix_entry[2][1]) * -1
+        exprs[2][0] = prim.Variable(det_inv) * FMA(matrix_entry[0][1], matrix_entry[1][2],
+                                                   -1 * matrix_entry[0][2] * matrix_entry[1][1])
+
+        exprs[0][1] = prim.Variable(det_inv) * FMA(matrix_entry[1][0], matrix_entry[2][2],
+                                                   -1 * matrix_entry[1][2] * matrix_entry[2][0]) * -1
+        exprs[1][1] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[2][2],
+                                                   -1 * matrix_entry[0][2] * matrix_entry[2][0])
+        exprs[2][1] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[1][2],
+                                                   -1 * matrix_entry[0][2] * matrix_entry[1][0]) * -1
+
+        exprs[0][2] = prim.Variable(det_inv) * FMA(matrix_entry[1][0], matrix_entry[2][1],
+                                                   -1 * matrix_entry[1][1] * matrix_entry[2][0])
+        exprs[1][2] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[2][1],
+                                                   -1 * matrix_entry[0][1] * matrix_entry[2][0]) * -1
+        exprs[2][2] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[1][1],
+                                                   -1 * matrix_entry[0][1] * matrix_entry[1][0])
     else:
         raise NotImplementedError
     for j in range(dim):
-- 
GitLab