Implement sumfactorized stokes numdiff

91e3b754 · René Heß · fc03aa10 · 91e3b754 · 91e3b754 · 91e3b754
Commit 91e3b754 authored 8 years ago by René Heß
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -210,7 +210,7 @@ class AccumulationSpace(Record):
            return (self.restriction,)


-def determine_accumulation_space(expr, number, measure):
+def determine_accumulation_space(expr, number, measure, idims=None):
    from dune.perftool.ufl.modified_terminals import extract_modified_arguments
    args = extract_modified_arguments(expr, argnumber=number)

@@ -236,7 +236,8 @@ def determine_accumulation_space(expr, number, measure):

    if len(subel.value_shape()) != 0:
        from dune.perftool.pdelab.geometry import dimension_iname
-        idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape())))
+        if idims is None:
+            idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape())))
        lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry())
        subel = subel.sub_elements()[0]

@@ -312,7 +313,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
    # When we do not do sumfactorization we do not split the test function
    assert(accterm.argument.expr is None)

-    # First we do the tree traversal to get a pymbolic expression representing this expression
+    # Do the tree traversal to get a pymbolic expression representing this expression
    pymbolic_expr = visitor(accterm.term)

    # It may happen that an entire accumulation term vanishes. We do nothing in that case

--- a/python/dune/perftool/pdelab/spaces.py
+++ b/python/dune/perftool/pdelab/spaces.py
@@ -79,7 +79,12 @@ def lfs_child(lfs, children, shape=None, symmetry=False):

            indices = (Sum((Product((n - 1, i)), Product((.5, i, 1 - i)), j)),)
        else:
-            children = tuple(Variable(c) for c in children)
+            # If the index is not an int we need to make a variable out of it.
+            #
+            # Note: ints occur in the sumfactorisation case with
+            # vector valued functions (eg. Stokes)
+            if not isinstance(children[0], int):
+                children = tuple(Variable(c) for c in children)
            indices = (Sum(tuple(Product((Product(tuple(shape[j] for j in range(i))), child)) for i, child in enumerate(children))),)

    return Call(LFSChild(lfs), indices)

--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -30,22 +30,22 @@ class SumFactInterface(PDELabInterface):
        return pymbolic_reference_gradient(element, restriction, number)

    def pymbolic_trialfunction_gradient(self, element, restriction, component, visitor=None):
-        ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, visitor)
+        ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, visitor.indices)
        visitor.indices = indices
        return ret

    def pymbolic_trialfunction(self, element, restriction, component, visitor=None):
-        ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, visitor)
+        ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, visitor.indices)
        visitor.indices = indices
        return ret

    def pymbolic_apply_function_gradient(self, element, restriction, component, visitor=None):
-        ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, visitor)
+        ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, visitor.indices)
        visitor.indices = indices
        return ret

    def pymbolic_apply_function(self, element, restriction, component, visitor=None):
-        ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, visitor)
+        ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, visitor.indices)
        visitor.indices = indices
        return ret


--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
+import itertools
+
 from dune.perftool.pdelab.argument import (name_accumulation_variable,
                                           PDELabAccumulationFunction,
                                           )
@@ -65,14 +67,17 @@ def name_test_function_contribution(test):

 @backend(interface="accum_insn", name="sumfact")
 def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
-    pymbolic_expr = visitor(accterm.term)
+    # When doing sumfactorization we want to split the test function
+    assert(accterm.argument.expr is not None)
+

+    # Do the tree traversal to get a pymbolic expression representing this expression
+    pymbolic_expr = visitor(accterm.term)
    if pymbolic_expr == 0:
        return

-    dim = world_dimension()
-
    # If this is a gradient, we find the gradient iname
+    dim = world_dimension()
    additional_inames = frozenset({})
    if accterm.new_indices is not None:
        for i in accterm.new_indices:
@@ -80,25 +85,55 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                from dune.perftool.pdelab.localoperator import grad_iname
                additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))

-    # Figure out the name of the accumulation variable
-    test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
-    ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
-    accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
+    # Get the degree of the element corresponding to this modified argument
+    mod_arg_expr = accterm.argument.expr
+    from ufl.classes import FunctionView, Argument
+    while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)) :
+        # If this has more than one operand we are potentially doing dangerous stuff
+        assert len(mod_arg_expr.ufl_operands) == 1
+        mod_arg_expr = mod_arg_expr.ufl_operands[0]
+    degree = mod_arg_expr.ufl_element()._degree
+    basis_size = degree + 1
+
+    def emit_sumfact_kernel(indices, restriction, insn_dep):
+        # Not implemented
+        if indices:
+            assert len(indices) <= 2
+
+        # Figure out the name of the accumulation variable
+        accum_idims = None
+        if indices is not None:
+            accum_idims = (indices[0],)
+        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=accum_idims)
+        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=accum_idims)
+        accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
+
+        # TODO: Adjust for stokes sumfact symdiff
+        jacobian_inames = tuple()
+        if accterm.is_jacobian:
+            jacobian_inames = visitor.inames

-    def emit_sumfact_kernel(i, restriction, insn_dep):
        # Construct the matrix sequence for this sum factorization
-        matrix_sequence = construct_basis_matrix_sequence(transpose=True,
-                                                          derivative=i if accterm.new_indices else None,
-                                                          facedir=get_facedir(accterm.argument.restriction),
-                                                          facemod=get_facemod(accterm.argument.restriction),
-                                                          )
-
+        matrix_sequence = construct_basis_matrix_sequence(
+            transpose=True,
+            derivative=indices[-1] if accterm.new_indices else None,
+            facedir=get_facedir(accterm.argument.restriction),
+            facemod=get_facemod(accterm.argument.restriction),
+            basis_size=basis_size)
+
+        # Avoid caching issues by passing the coeff_func_index
+        coeff_func_index = None
+        if indices and len(indices) == 2:
+            coeff_func_index = indices[0]
+
+        # TODO: Adapt preferred position for stokes sumfact symdiff
        sf = SumfactKernel(matrix_sequence=matrix_sequence,
                           restriction=(accterm.argument.restriction, restriction),
                           stage=3,
-                           preferred_position=i if accterm.new_indices else None,
-                           within_inames=visitor.inames,
+                           preferred_position=indices[-1] if accterm.new_indices else None,
                           accumvar=accum,
+                           within_inames=jacobian_inames,
+                           coeff_func_index=coeff_func_index,
                           )

        from dune.perftool.sumfact.vectorization import attach_vectorization_info
@@ -123,15 +158,21 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
            assignee = prim.Subscript(lp.TaggedVariable(temp, vsf.tag), pad)
            instruction(assignee=assignee,
                        expression=0,
-                        forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
+                        forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames),
                        forced_iname_deps_is_final=True,
                        tags=frozenset(["quadvec", "gradvec"])
                        )

        # Replace gradient iname with correct index for assignement
        replace_dict = {}
+
+        # If we have two indices the first belongs to the dimension
+        # and the second one to the derivative. We handle all these
+        # indices by hand and replace them accordingly.
+        if indices and len(indices) == 2:
+                replace_dict['idim_arg0'] = indices[0]
        for iname in additional_inames:
-            replace_dict[prim.Variable(iname)] = i
+            replace_dict[prim.Variable(iname)] = indices[-1]
        from dune.perftool.loopy.symbolic import substitute
        expression = substitute(pymbolic_expr, replace_dict)

@@ -141,9 +182,9 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
            timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
            post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
            dump_accumulate_timer(timer_name)
-            if(visitor.inames):
+            if(jacobian_inames):
                timer_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
-                                                   within_inames=frozenset(visitor.inames))})
+                                                   within_inames=frozenset(jacobian_inames))})

        # Determine dependencies
        from loopy.match import Or, Writes
@@ -157,7 +198,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                  vsf.quadrature_index(sf))
        contrib_dep = instruction(assignee=assignee,
                                  expression=expression,
-                                  forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
+                                  forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames),
                                  forced_iname_deps_is_final=True,
                                  tags=frozenset({"quadvec"}).union(vectag),
                                  depends_on=frozenset({deps}).union(timer_dep)
@@ -169,23 +210,22 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
        if get_option("instrumentation_level") >= 4:
            insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
                                              depends_on=insn_dep,
-                                              within_inames=frozenset(visitor.inames))})
+                                              within_inames=frozenset(jacobian_inames))})

        inames = tuple(accum_iname((accterm.argument.restriction, restriction), mat.rows, i)
                       for i, mat in enumerate(vsf.matrix_sequence))

        # Collect the lfs and lfs indices for the accumulate call
        test_lfs.index = flatten_index(tuple(prim.Variable(i) for i in inames),
-                                       (basis_functions_per_direction(),) * dim,
+                                       (basis_size,) * dim,
                                       order="f"
                                       )

        # In the jacobian case, also determine the space for the ansatz space
-        rank = 2 if visitor.inames else 1
-        if rank == 2:
+        if accterm.is_jacobian:
            # TODO the next line should get its inames from
            # elsewhere. This is *NOT* robust (but works right now)
-            ansatz_lfs.index = flatten_index(tuple(prim.Variable(visitor.inames[i])
+            ansatz_lfs.index = flatten_index(tuple(prim.Variable(jacobian_inames[i])
                                                   for i in range(world_dimension())),
                                             (basis_functions_per_direction(),) * dim,
                                             order="f"
@@ -210,6 +250,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                               )

        if not get_option("fastdg"):
+            rank = 2 if accterm.is_jacobian else 1
            expr = prim.Call(PDELabAccumulationFunction(accum, rank),
                             (test_lfs.get_args() +
                              ansatz_lfs.get_args() +
@@ -218,27 +259,32 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                             )
            instruction(assignees=(),
                        expression=expr,
-                        forced_iname_deps=frozenset(inames + visitor.inames + vecinames),
+                        forced_iname_deps=frozenset(inames + vecinames + jacobian_inames),
                        forced_iname_deps_is_final=True,
                        depends_on=insn_dep,
                        )

        # Mark the transformation that moves the quadrature loop
        # inside the trialfunction loops for application
-        transform(nest_quadrature_loops, visitor.inames)
+        if accterm.is_jacobian:
+            transform(nest_quadrature_loops, jacobian_inames)

        return insn_dep

    # Extract the restrictions on argument-1:
    jac_restrictions = frozenset(tuple(ma.restriction for ma in
-                                       extract_modified_arguments(accterm.term, argnumber=1, do_index=True)))
+                                       extract_modified_arguments(accterm.term,
+                                                                  argnumber=1,
+                                                                  do_index=True)))
    if not jac_restrictions:
        jac_restrictions = frozenset({0})

    insn_dep = None
    for restriction in jac_restrictions:
        if accterm.new_indices:
-            for i in range(world_dimension()):
-                insn_dep = emit_sumfact_kernel(i, restriction, insn_dep)
+            shape = (world_dimension(),) * len(accterm.new_indices)
+            # Iterate over all combinations of indices for this shape
+            for indices in itertools.product(*map(range, shape)):
+                insn_dep = emit_sumfact_kernel(indices, restriction, insn_dep)
        else:
            insn_dep = emit_sumfact_kernel(None, restriction, insn_dep)
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -3,6 +3,7 @@
 NB: Basis evaluation is only needed for the trial function argument in jacobians, as the
 multiplication with the test function is part of the sum factorization kernel.
 """
+import itertools

 from dune.perftool.generation import (backend,
                                      domain,
@@ -50,11 +51,26 @@ def name_sumfact_base_buffer():
    return name


+def _basis_functions_per_direction(element, component):
+    """Number of basis functions per direction of a given component of an element"""
+    assert len(component.indices()) <= 1
+    if len(component.indices()) == 0:
+        degree = element.degree()
+    else:
+        index = component.indices()[0]._value
+        degree = element.sub_elements()[index].degree()
+    basis_size = degree + 1
+    return basis_size
+
+
 @kernel_cached
-def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, visitor):
+def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, visitor_indices):
    rawname = "gradu" + "_".join(str(c) for c in component)
    name = restricted_name(rawname, restriction)

+    # Number of basis functions per direction
+    basis_size = _basis_functions_per_direction(element, component)
+
    # Get a temporary for the gradient
    from ufl.functionview import select_subelement
    sub_element = select_subelement(element, component)
@@ -67,20 +83,30 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
    # if the positioning within a SIMD vectors coincides with the index!
    direct_indexing_is_possible = True

-    expressions = []
+    expressions = {}
    insn_dep = frozenset()
-    for i in range(world_dimension()):
+    for indices in itertools.product(*map(range, shape)):
+        # Do not consider derivatives of rank > 2
+        assert len(indices) <= 2
+
        # Construct the matrix sequence for this sum factorization
-        matrix_sequence = construct_basis_matrix_sequence(derivative=i,
+        matrix_sequence = construct_basis_matrix_sequence(derivative=indices[-1],
                                                          facedir=get_facedir(restriction),
                                                          facemod=get_facemod(restriction),
+                                                          basis_size=basis_size,
                                                          )

+        # Index needed to acces the right coefficient container for vector valued functions
+        coeff_func_index = None
+        if len(indices) == 2:
+            coeff_func_index = indices[0]
+
        # The sum factorization kernel object gathering all relevant information
        sf = SumfactKernel(matrix_sequence=matrix_sequence,
                           restriction=restriction,
-                           preferred_position=i,
+                           preferred_position=indices[-1],
                           coeff_func=coeff_func,
+                           coeff_func_index=coeff_func_index,
                           element=element,
                           component=component,
                           )
@@ -88,7 +114,7 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
        from dune.perftool.sumfact.vectorization import attach_vectorization_info
        vsf = attach_vectorization_info(sf)

-        if i != vsf.vec_index(sf):
+        if indices[-1] != vsf.vec_index(sf):
            direct_indexing_is_possible = False

        # Add a sum factorization kernel that implements the
@@ -97,33 +123,37 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
        from dune.perftool.sumfact.realization import realize_sum_factorization_kernel
        var, insn_dep = realize_sum_factorization_kernel(vsf.copy(insn_dep=vsf.insn_dep.union(insn_dep)))

-        expressions.append(prim.Subscript(var, vsf.quadrature_index(sf)))
+        expressions.update({indices: prim.Subscript(var, vsf.quadrature_index(sf))})

    # Check whether we want to return early with something that has the indexing
    # already handled! This happens with vectorization when the index coincides
    # with the position in the vector register.
    if direct_indexing_is_possible:
-        assert len(visitor.indices) == 1
-        return maybe_wrap_subscript(var, vsf.quadrature_index(sf, visitor.indices)), None
+        assert len(visitor_indices) == 1
+        return maybe_wrap_subscript(var, vsf.quadrature_index(sf, visitor_indices)), None

    # TODO this should be quite conditional!!!
-    for i, expr in enumerate(expressions):
+    for indices in itertools.product(*map(range, shape)):
        # Write solution from sumfactorization to gradient variable
-        assignee = prim.Subscript(prim.Variable(name), i)
+        assignee = prim.Subscript(prim.Variable(name), indices)
        instruction(assignee=assignee,
-                    expression=expr,
+                    expression=expressions[indices],
                    forced_iname_deps=frozenset(get_backend("quad_inames")()),
                    forced_iname_deps_is_final=True,
                    )

-    return prim.Variable(name), visitor.indices
+    return prim.Variable(name), visitor_indices


 @kernel_cached
-def pymbolic_coefficient(element, restriction, component, coeff_func, visitor):
+def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_indices):
+    # Basis functions per direction
+    basis_size = _basis_functions_per_direction(element, component)
+
    # Construct the matrix sequence for this sum factorization
    matrix_sequence = construct_basis_matrix_sequence(facedir=get_facedir(restriction),
-                                                      facemod=get_facemod(restriction),)
+                                                      facemod=get_facemod(restriction),
+                                                      basis_size=basis_size)

    sf = SumfactKernel(matrix_sequence=matrix_sequence,
                       restriction=restriction,
@@ -142,7 +172,7 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor):

    return prim.Subscript(var,
                          vsf.quadrature_index(sf)
-                          ), visitor.indices
+                          ), visitor_indices


 @iname
@@ -175,8 +205,12 @@ def evaluate_basis(element, name, restriction):

    # Add the missing direction on facedirs by evaluating at either 0 or 1
    if facedir is not None:
+        # TODO: Does not work for systems!
+        from tabulation import polynomial_degree
+        degree = polynomial_degree()
+
        facemod = get_facemod(restriction)
-        prod = prod + (prim.Call(PolynomialLookup(name_polynomials(), False),
+        prod = prod + (prim.Call(PolynomialLookup(name_polynomials(degree), False),
                                 (prim.Variable(inames[facedir]), facemod)),)

    # Issue the product
@@ -222,8 +256,12 @@ def evaluate_reference_gradient(element, name, restriction):
            if i != facedir:
                prod.append(BasisTabulationMatrix(derivative=d == i).pymbolic((prim.Variable(quadinamemapping[i]), prim.Variable(inames[i]))))
        if facedir is not None:
+            # TODO: Does not work for systems!
+            from tabulation import polynomial_degree
+            degree = polynomial_degree()
+
            facemod = get_facemod(restriction)
-            prod.append(prim.Call(PolynomialLookup(name_polynomials(), facedir == d),
+            prod.append(prim.Call(PolynomialLookup(name_polynomials(degree), facedir == d),
                                  (prim.Variable(inames[facedir]), facemod)),)

        assignee = prim.Subscript(prim.Variable(name), (d,))

--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -2,6 +2,8 @@
 The code that triggers the creation of the necessary code constructs
 to realize a sum factorization kernel
 """
+from ufl.functionview import select_subelement
+from ufl import VectorElement, TensorElement

 from dune.perftool.generation import (barrier,
                                      dump_accumulate_timer,
@@ -17,8 +19,9 @@ from dune.perftool.loopy.buffer import (get_buffer_temporary,
                                        switch_base_storage,
                                        )
 from dune.perftool.pdelab.argument import pymbolic_coefficient
+from dune.perftool.pdelab.basis import shape_as_pymbolic
 from dune.perftool.pdelab.geometry import world_dimension
-from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound
+from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, name_leaf_lfs
 from dune.perftool.options import get_option
 from dune.perftool.pdelab.signatures import assembler_routine_name
 from dune.perftool.sumfact.permutation import (sumfact_permutation_strategy,
@@ -71,9 +74,31 @@ def _realize_sum_factorization_kernel(sf):
        def _write_input(inputsf, index=0):
            # Write initial coefficients into buffer
            lfs = name_lfs(inputsf.element, inputsf.restriction, inputsf.component)
+
+            sub_element = select_subelement(inputsf.element, inputsf.component)
+            shape = sub_element.value_shape() + (inputsf.element.cell().geometric_dimension(),)
+
+            if isinstance(sub_element, (VectorElement, TensorElement)):
+                # Could be 0 but shouldn't be None
+                assert inputsf.coeff_func_index is not None
+
+                lfs_pym = lfs_child(lfs,
+                                    (inputsf.coeff_func_index,),
+                                    shape=shape_as_pymbolic(shape[:-1]),
+                                    symmetry=inputsf.element.symmetry())
+
+            leaf_element = sub_element
+            if isinstance(sub_element, (VectorElement, TensorElement)):
+                leaf_element = sub_element.sub_elements()[0]
+
+            lfs = name_leaf_lfs(leaf_element, inputsf.restriction)
+
            basisiname = sumfact_iname(name_lfs_bound(lfs), "basis")
            container = inputsf.coeff_func(inputsf.restriction)
-            coeff = pymbolic_coefficient(container, lfs, basisiname)
+            if isinstance(sub_element, (VectorElement, TensorElement)):
+                coeff = pymbolic_coefficient(container, lfs_pym, basisiname)
+            else:
+                coeff = pymbolic_coefficient(container, lfs, basisiname)
            assignee = prim.Subscript(prim.Variable(input_setup), (prim.Variable(basisiname),) + (index,))
            instruction(assignee=assignee,
                        expression=coeff,

--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -29,6 +29,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
                 input=None,
                 insn_dep=frozenset(),
                 coeff_func=None,
+                 coeff_func_index=None,
                 element=None,
                 component=None,
                 accumvar=None,
@@ -97,6 +98,8 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
        padding: a set of slots in the input temporary to be padded
        index: The slot in the input temporary dedicated to this kernel
        coeff_func: The function to call to get the input coefficient
+        coeff_func_index: Index to get the right input coefficient
+            (needed for systems of PDEs)
        element: The UFL element
        component: The treepath to the correct component of above element
        accumvar: The accumulation variable to accumulate into
@@ -159,7 +162,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
        work on the same input coefficient (and are suitable for simultaneous
        treatment because of that)
        """
-        return (self.restriction, self.stage, self.coeff_func, self.element, self.component, self.accumvar)
+        return (self.restriction, self.stage, self.coeff_func, self.coeff_func_index, self.element, self.component, self.accumvar)

    #
    # Some convenience methods to extract information about the sum factorization kernel
@@ -354,7 +357,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
        work on the same input coefficient (and are suitable for simultaneous
        treatment because of that)
        """
-        return (self.restriction, self.stage, self.coeff_func, self.element, self.component, self.accumvar)
+        return (self.restriction, self.stage, self.coeff_func, self.coeff_func_index, self.element, self.component, self.accumvar)

    #
    # Deduce all data fields of normal sum factorization kernels from the underlying kernels
@@ -384,6 +387,11 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
        assert len(set(k.coeff_func for k in self.kernels)) == 1
        return self.kernels[0].coeff_func

+    @property
+    def coeff_func_index(self):
+        assert len(set(k.coeff_func_index for k in self.kernels)) == 1
+        return self.kernels[0].coeff_func_index
+
    @property
    def element(self):
        assert len(set(k.element for k in self.kernels)) == 1

--- a/python/dune/perftool/sumfact/tabulation.py
+++ b/python/dune/perftool/sumfact/tabulation.py
@@ -86,13 +86,13 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord):
            return self.basis_size

    def pymbolic(self, indices):
-        name = "{}{}Theta{}{}_qp{}_dof_{}".format("face{}_".format(self.face) if self.face is not None else "",
-                                                  "d" if self.derivative else "",
-                                                  "T" if self.transpose else "",
-                                                  "_slice{}".format(self.slice_index) if self.slice_size is not None else "",
-                                                  self.quadrature_size,
-                                                  self.basis_size,
-                                                  )
+        name = "{}{}Theta{}{}_qp{}_dof{}".format("face{}_".format(self.face) if self.face is not None else "",
+                                                 "d" if self.derivative else "",
+                                                 "T" if self.transpose else "",
+                                                 "_slice{}".format(self.slice_index) if self.slice_size is not None else "",
+                                                 self.quadrature_size,
+                                                 self.basis_size,
+                                                 )
        define_theta(name, self)
        return prim.Subscript(prim.Variable(name), indices)

@@ -162,7 +162,7 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase):
    def pymbolic(self, indices):
        assert len(indices) == 3

-        # Check whether we can realize this by broadcasting the values of a simple tabulation 
+        # Check whether we can realize this by broadcasting the values of a simple tabulation
        if len(set(self.tabs)) == 1:
            theta = self.tabs[0].pymbolic(indices[:-1])
            return prim.Call(ExplicitVCLCast(np.float64, vector_width=len(self.tabs)), (theta,))
@@ -254,10 +254,9 @@ def name_oned_quadrature_points():


 @class_member(classtag="operator")
-def typedef_polynomials(name):
+def typedef_polynomials(name, degree):
    range_field = lop_template_range_field()
    domain_field = name_domain_field()
-    degree = polynomial_degree()

    include_file("dune/pdelab/finiteelementmap/qkdg.hh", filetag="operatorfile")

@@ -272,21 +271,21 @@ def typedef_polynomials(name):
                                                                                          degree)


-def type_polynomials():
-    name = "Polynomials1D"
-    typedef_polynomials(name)
+def type_polynomials(degree):
+    name = "Polynomials1D_Degree{}".format(degree)
+    typedef_polynomials(name, degree)
    return name


 @class_member(classtag="operator")
-def define_polynomials(name):
-    polynomials_type = type_polynomials()
+def define_polynomials(name, degree):
+    polynomials_type = type_polynomials(degree)
    return "{} {};".format(polynomials_type, name)


-def name_polynomials():
-    name = "poly"
-    define_polynomials(name)
+def name_polynomials(degree):
+    name = "poly_degree{}".format(degree)
+    define_polynomials(name, degree)
    return name


@@ -330,7 +329,8 @@ def define_theta(name, tabmat, additional_indices=(), width=None):
    qp = name_oned_quadrature_points()
    qw = name_oned_quadrature_weights()
    sort_quadrature_points_weights(qp, qw)
-    polynomials = name_polynomials()
+    degree = tabmat.basis_size-1
+    polynomials = name_polynomials(degree)

    shape = (tabmat.rows, tabmat.cols)
    dim_tags = "f,f"
@@ -367,13 +367,14 @@ def define_theta(name, tabmat, additional_indices=(), width=None):
                )


-def construct_basis_matrix_sequence(transpose=False, derivative=None, facedir=None, facemod=None):
+def construct_basis_matrix_sequence(transpose=False, derivative=None, facedir=None, facemod=None, basis_size=None):
    dim = world_dimension()
    result = [None] * dim

    for i in range(dim):
        quadrature_size = quadrature_points_per_direction()
-        basis_size = basis_functions_per_direction()
+        if basis_size is None:
+            basis_size = basis_functions_per_direction()

        onface = None
        if facedir == i:

--- a/python/dune/perftool/ufl/extract_accumulation_terms.py
+++ b/python/dune/perftool/ufl/extract_accumulation_terms.py
@@ -31,10 +31,21 @@ class AccumulationTerm(Record):
                 new_indices=None
                 ):
        assert (isinstance(argument, ModifiedArgument) or argument is None)
+
+        # Test if this expression belongs to a jacobian method
+        all_jacobian_args = extract_modified_arguments(term,
+                                                       argnumber=1,
+                                                       do_index=False,
+                                                       do_gradient=False)
+        is_jacobian = False
+        if all_jacobian_args:
+            is_jacobian = True
+
        Record.__init__(self,
                        term=term,
                        argument=argument,
-                        new_indices=new_indices
+                        new_indices=new_indices,
+                        is_jacobian=is_jacobian
                        )

    def indexed_test_arg(self):

--- a/test/sumfact/stokes/stokes.mini
+++ b/test/sumfact/stokes/stokes.mini
@@ -13,5 +13,8 @@ extension = vtu
 [formcompiler]
 numerical_jacobian = 1, 0 | expand num
 exact_solution_expression = g
-compare_l2errorsquared = 1e-10
+compare_l2errorsquared = 1e-12
 sumfact = 1
+
+# Disable symdiff test for now
+{__exec_suffix} == symdiff | exclude