diff --git a/python/dune/codegen/pdelab/tensors.py b/python/dune/codegen/pdelab/tensors.py index fa9d52e6f4bd83aa8ca93f03b4167d5754a08a9e..a1a52a4c4012f404aaf036b5067facb6067f5b4a 100644 --- a/python/dune/codegen/pdelab/tensors.py +++ b/python/dune/codegen/pdelab/tensors.py @@ -7,7 +7,7 @@ from dune.codegen.generation import (get_counted_variable, instruction, temporary_variable, ) - +from dune.codegen.loopy.symbolic import FusedMultiplyAdd as FMA from loopy.match import Writes import pymbolic.primitives as prim @@ -24,16 +24,16 @@ def define_determinant(name, matrix, shape, visitor): matrix_entry = [[prim.Subscript(prim.Variable(matrix), (i, j)) for j in range(dim)] for i in range(dim)] if dim == 2: - expr_determinant = prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][1])), - -1 * prim.Product((matrix_entry[1][0], matrix_entry[0][1])))) + expr_determinant = FMA(matrix_entry[0][0], matrix_entry[1][1], + -1 * prim.Product((matrix_entry[1][0], matrix_entry[0][1]))) + elif dim == 3: - expr_determinant = prim.Sum((prim.Product((matrix_entry[0][0], matrix_entry[1][1], matrix_entry[2][2])), - prim.Product((matrix_entry[0][1], matrix_entry[1][2], matrix_entry[2][0])), - prim.Product((matrix_entry[0][2], matrix_entry[1][0], matrix_entry[2][1])), - -1 * prim.Product((matrix_entry[0][2], matrix_entry[1][1], matrix_entry[2][0])), - -1 * prim.Product((matrix_entry[0][0], matrix_entry[1][2], matrix_entry[2][1])), - -1 * prim.Product((matrix_entry[0][1], matrix_entry[1][0], matrix_entry[2][2])) - )) + fma_A = FMA(matrix_entry[1][1], matrix_entry[2][2], -1 * matrix_entry[1][2] * matrix_entry[2][1]) + fma_B = FMA(matrix_entry[1][0], matrix_entry[2][2], -1 * matrix_entry[1][2] * matrix_entry[2][0]) + fma_C = FMA(matrix_entry[1][0], matrix_entry[2][1], -1 * matrix_entry[1][1] * matrix_entry[2][0]) + + expr_determinant = FMA(matrix_entry[0][2], fma_C, + FMA(matrix_entry[0][0], fma_A, -1 * matrix_entry[0][1] * fma_B)) else: raise NotImplementedError() instruction(expression=expr_determinant,