From 05c64326fb9e3d5c0b27e04ad49eb76358a55beb Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Thu, 31 Jan 2019 11:34:42 +0100 Subject: [PATCH] use fma for inverse 3d -> 9 fma + 18 mul (without -1 *) 2 fma more than C++, 4 mul/add more than C++ --- python/dune/codegen/pdelab/tensors.py | 52 +++++++++++---------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/python/dune/codegen/pdelab/tensors.py b/python/dune/codegen/pdelab/tensors.py index a1a52a4c..360f9a14 100644 --- a/python/dune/codegen/pdelab/tensors.py +++ b/python/dune/codegen/pdelab/tensors.py @@ -24,8 +24,7 @@ def define_determinant(name, matrix, shape, visitor): matrix_entry = [[prim.Subscript(prim.Variable(matrix), (i, j)) for j in range(dim)] for i in range(dim)] if dim == 2: - expr_determinant = FMA(matrix_entry[0][0], matrix_entry[1][1], - -1 * prim.Product((matrix_entry[1][0], matrix_entry[0][1]))) + expr_determinant = FMA(matrix_entry[0][0], matrix_entry[1][1], -1 * matrix_entry[1][0] * matrix_entry[0][1]) elif dim == 3: fma_A = FMA(matrix_entry[1][1], matrix_entry[2][2], -1 * matrix_entry[1][2] * matrix_entry[2][1]) @@ -73,35 +72,26 @@ def define_matrix_inverse(name, name_inv, shape, visitor): sign = 1. if i == j else -1. exprs[i][j] = prim.Product((sign, prim.Variable(det_inv), matrix_entry[1 - i][1 - j])) elif dim == 3: - exprs[0][0] = prim.Product((1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[1][1], matrix_entry[2][2])), - -1 * prim.Product((matrix_entry[1][2], matrix_entry[2][1])))))) - exprs[1][0] = prim.Product((-1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][1], matrix_entry[2][2])), - -1 * prim.Product((matrix_entry[0][2], matrix_entry[2][1])))))) - exprs[2][0] = prim.Product((1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][1], matrix_entry[1][2])), - -1 * prim.Product((matrix_entry[0][2], matrix_entry[1][1])))))) - - exprs[0][1] = prim.Product((-1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[1][0], matrix_entry[2][2])), - -1 * prim.Product((matrix_entry[1][2], matrix_entry[2][0])))))) - exprs[1][1] = prim.Product((1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[2][2])), - -1 * prim.Product((matrix_entry[0][2], matrix_entry[2][0])))))) - exprs[2][1] = prim.Product((-1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][2])), - -1 * prim.Product((matrix_entry[0][2], matrix_entry[1][0])))))) - - exprs[0][2] = prim.Product((1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[1][0], matrix_entry[2][1])), - -1 * prim.Product((matrix_entry[1][1], matrix_entry[2][0])))))) - exprs[1][2] = prim.Product((-1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[2][1])), - -1 * prim.Product((matrix_entry[0][1], matrix_entry[2][0])))))) - exprs[2][2] = prim.Product((1., prim.Variable(det_inv), - prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][1])), - -1 * prim.Product((matrix_entry[0][1], matrix_entry[1][0])))))) + exprs[0][0] = prim.Variable(det_inv) * FMA(matrix_entry[1][1], matrix_entry[2][2], + -1 * matrix_entry[1][2] * matrix_entry[2][1]) + exprs[1][0] = prim.Variable(det_inv) * FMA(matrix_entry[0][1], matrix_entry[2][2], + -1 * matrix_entry[0][2] * matrix_entry[2][1]) * -1 + exprs[2][0] = prim.Variable(det_inv) * FMA(matrix_entry[0][1], matrix_entry[1][2], + -1 * matrix_entry[0][2] * matrix_entry[1][1]) + + exprs[0][1] = prim.Variable(det_inv) * FMA(matrix_entry[1][0], matrix_entry[2][2], + -1 * matrix_entry[1][2] * matrix_entry[2][0]) * -1 + exprs[1][1] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[2][2], + -1 * matrix_entry[0][2] * matrix_entry[2][0]) + exprs[2][1] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[1][2], + -1 * matrix_entry[0][2] * matrix_entry[1][0]) * -1 + + exprs[0][2] = prim.Variable(det_inv) * FMA(matrix_entry[1][0], matrix_entry[2][1], + -1 * matrix_entry[1][1] * matrix_entry[2][0]) + exprs[1][2] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[2][1], + -1 * matrix_entry[0][1] * matrix_entry[2][0]) * -1 + exprs[2][2] = prim.Variable(det_inv) * FMA(matrix_entry[0][0], matrix_entry[1][1], + -1 * matrix_entry[0][1] * matrix_entry[1][0]) else: raise NotImplementedError for j in range(dim): -- GitLab