Merge branch 'feature/sumfact-poisson' into 'master'

Feature/sumfact poisson See merge request !59

Merge branch 'feature/sumfact-poisson' into 'master'
10149193 · Dominic Kempf · 6e868bbc · 1288f744 · 10149193 · 10149193
Commit 10149193 authored 8 years ago by Dominic Kempf
--- a/python/dune/perftool/error.py
+++ b/python/dune/perftool/error.py
 """ Some error classes for dune-perftool """

+
 class PerftoolError(Exception):
    pass


--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -4,7 +4,9 @@ from dune.perftool.sumfact.quadrature import (quadrature_inames,

 from dune.perftool.sumfact.basis import (lfs_inames,
                                         pymbolic_basis,
+                                         pymbolic_reference_gradient,
                                         pymbolic_trialfunction,
+                                         pymbolic_trialfunction_gradient,
                                         )

 from dune.perftool.pdelab import PDELabInterface
@@ -17,6 +19,12 @@ class SumFactInterface(PDELabInterface):
    def pymbolic_basis(self, element, restriction, number):
        return pymbolic_basis(element, restriction, number)

+    def pymbolic_reference_gradient(self, element, restriction, number):
+        return pymbolic_reference_gradient(element, restriction, number)
+
+    def pymbolic_trialfunction_gradient(self, element, restriction, component):
+        return pymbolic_trialfunction_gradient(element, restriction, component)
+
    def pymbolic_trialfunction(self, element, restriction, component):
        return pymbolic_trialfunction(element, restriction, component)


--- a/python/dune/perftool/sumfact/amatrix.py
+++ b/python/dune/perftool/sumfact/amatrix.py
@@ -193,7 +193,7 @@ def sort_quadrature_points_weights():


 @constructor_block("operator")
-def construct_theta(name, transpose):
+def construct_theta(name, transpose, derivative):
    # Make sure that the quadrature points are sorted
    sort_quadrature_points_weights()

@@ -204,11 +204,13 @@ def construct_theta(name, transpose):
    polynomials = name_polynomials()
    qp = name_oned_quadrature_points()

-    access = "j,i" if transpose else "i,j"
+    # access = "j,i" if transpose else "i,j"
+    basispol = "dp" if derivative else "p"
+    polynomial_access = "i,{}[j]".format(qp) if transpose else "j,{}[i]".format(qp)

    return ["for (std::size_t i=0; i<{}; i++){{".format(shape[0]),
            "  for (std::size_t j=0; j<{}; j++){{".format(shape[1]),
-            "    {}.colmajoraccess({}) = {}.p(j,{}[i]);".format(name, access, polynomials, qp),
+            "    {}.colmajoraccess(i,j) = {}.{}({});".format(name, polynomials, basispol, polynomial_access),
            "  }",
            "}"]

@@ -228,10 +230,10 @@ def type_theta():


 @class_member("operator")
-def define_theta(name, shape, transpose):
+def define_theta(name, shape, transpose, derivative):
    theta_type = type_theta()
    initializer_list(name, [str(axis) for axis in shape], classtag="operator")
-    construct_theta(name, transpose)
+    construct_theta(name, transpose, derivative)
    return "{} {};".format(theta_type, name)


@@ -239,7 +241,7 @@ def name_theta():
    name = "Theta"
    shape = (quadrature_points_per_direction(), basis_functions_per_direction())
    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
-    define_theta(name, shape, False)
+    define_theta(name, shape, False, False)
    return name


@@ -247,5 +249,21 @@ def name_theta_transposed():
    name = "ThetaT"
    shape = (basis_functions_per_direction(), quadrature_points_per_direction())
    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
-    define_theta(name, shape, True)
+    define_theta(name, shape, True, False)
+    return name
+
+
+def name_dtheta():
+    name = "dTheta"
+    shape = (quadrature_points_per_direction(), basis_functions_per_direction())
+    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
+    define_theta(name, shape, False, True)
+    return name
+
+
+def name_dtheta_transposed():
+    name = "dThetaT"
+    shape = (basis_functions_per_direction(), quadrature_points_per_direction())
+    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
+    define_theta(name, shape, True, True)
    return name
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -6,6 +6,7 @@ multiplication with the test function is part of the sum factorization kernel.
 from dune.perftool.generation import (backend,
                                      cached,
                                      domain,
+                                      get_counter,
                                      get_global_context_value,
                                      iname,
                                      instruction,
@@ -14,6 +15,7 @@ from dune.perftool.generation import (backend,
 from dune.perftool.sumfact.amatrix import (AMatrix,
                                           basis_functions_per_direction,
                                           ColMajorAccess,
+                                           name_dtheta,
                                           name_theta,
                                           quadrature_points_per_direction,
                                           )
@@ -31,6 +33,100 @@ from pytools import product
 import pymbolic.primitives as prim


+def name_sumfact_base_buffer():
+    count = get_counter('sumfact_base_buffer')
+    name = "buffer_{}".format(str(count))
+    return name
+
+
+@cached
+def sumfact_evaluate_coefficient_gradient(element, name, restriction, component):
+    # First we determine the rank of the tensor we are talking about
+    from ufl.functionview import select_subelement
+    sub_element = select_subelement(element, component)
+    rank = len(sub_element.value_shape()) + 1
+
+    # We do then set some variables accordingly
+    shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
+    shape_impl = ('arr',) * rank
+
+    from dune.perftool.pdelab.geometry import dimension_iname
+    idims = tuple(dimension_iname(count=i) for i in range(rank))
+    leaf_element = sub_element
+    from ufl import VectorElement, TensorElement
+    if isinstance(sub_element, (VectorElement, TensorElement)):
+        leaf_element = sub_element.sub_elements()[0]
+
+    # and proceed to call the necessary generator functions
+    temporary_variable(name, shape=shape, shape_impl=shape_impl)
+    from dune.perftool.pdelab.spaces import name_lfs
+    lfs = name_lfs(element, restriction, component)
+    from dune.perftool.pdelab.basis import pymbolic_reference_gradient
+    basis = pymbolic_reference_gradient(leaf_element, restriction, 0, context='trialgrad')
+    from dune.perftool.tools import get_pymbolic_indices
+    index, _ = get_pymbolic_indices(basis)
+    if isinstance(sub_element, (VectorElement, TensorElement)):
+        lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry())
+
+    # Calculate values with sumfactorization
+    theta = name_theta()
+    dtheta = name_dtheta()
+    rows = quadrature_points_per_direction()
+    cols = basis_functions_per_direction()
+    theta_matrix = AMatrix(theta, rows, cols)
+    dtheta_matrix = AMatrix(dtheta, rows, cols)
+
+    # TODO:
+    # - This only covers rank 1
+    # - Avoid setting up whole gradient if only one component is needed?
+    # Get geometric dimension
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    buffers = []
+    for i in range(dim):
+        a_matrices = [theta_matrix] * dim
+        a_matrices[i] = dtheta_matrix
+        a_matrices = tuple(a_matrices)
+
+        # Do stage 1
+        buffer_name = name_sumfact_base_buffer()
+        initialize_buffer(buffer_name,
+                          base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
+                          num=2
+                          )
+
+        insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
+        var, _ = sum_factorization_kernel(a_matrices,
+                                          buffer_name,
+                                          insn_dep=frozenset({insn_dep}),
+                                          )
+
+        buffers.append(var)
+
+    for i, buf in enumerate(buffers):
+        # Write solution from sumfactorization to gradient variable
+        from pymbolic.primitives import Subscript, Variable
+        from dune.perftool.generation import get_backend
+        assignee = Subscript(Variable(name), i)
+        expression = Subscript(Variable(buf), tuple(Variable(i) for i in quadrature_inames()))
+        instruction(assignee=assignee,
+                    expression=expression,
+                    forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
+                    forced_iname_deps_is_final=True,
+                    )
+
+
+@cached
+def pymbolic_trialfunction_gradient(element, restriction, component):
+    rawname = "gradu" + "_".join(str(c) for c in component)
+    name = restricted_name(rawname, restriction)
+    from dune.perftool.pdelab.argument import name_coefficientcontainer
+    container = name_coefficientcontainer(restriction)
+    sumfact_evaluate_coefficient_gradient(element, name, restriction, component)
+    from pymbolic.primitives import Variable
+    return Variable(name)
+
+
 @cached
 def pymbolic_trialfunction(element, restriction, component):
    theta = name_theta()
@@ -39,16 +135,17 @@ def pymbolic_trialfunction(element, restriction, component):
    a_matrix = AMatrix(theta, rows, cols)
    # TODO: this is 2D only
    a_matrices = (a_matrix, a_matrix)
+    buffer_name = name_sumfact_base_buffer()

    # Do stage 1
-    initialize_buffer("buffer",
+    initialize_buffer(buffer_name,
                      base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
                      num=2
                      )

-    insn_dep = setup_theta(element, restriction, component, a_matrices)
+    insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
    var, _ = sum_factorization_kernel(a_matrices,
-                                      "buffer",
+                                      buffer_name,
                                      insn_dep=frozenset({insn_dep}),
                                      )

@@ -102,3 +199,53 @@ def pymbolic_basis(element, restriction, number):
    evaluate_basis(element, name, restriction)

    return prim.Variable(name)
+
+
+@backend(interface="evaluate_grad")
+@cached
+def evaluate_reference_gradient(element, name, restriction):
+    # from dune.perftool.pdelab.basis import name_leaf_lfs
+    # lfs = name_leaf_lfs(element, restriction)
+    # from dune.perftool.pdelab.spaces import name_lfs_bound
+    from dune.perftool.pdelab.geometry import name_dimension
+    temporary_variable(
+        name,
+        shape=(name_dimension(),))
+    quad_inames = quadrature_inames()
+    inames = lfs_inames(element, restriction)
+    assert(len(quad_inames) == len(inames))
+
+    theta = name_theta()
+    dtheta = name_dtheta()
+
+    # Get geometric dimension
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+
+    for i in range(dim):
+        calls = [prim.Call(ColMajorAccess(theta), (prim.Variable(i), prim.Variable(j)))
+                 for (i, j) in zip(quad_inames, inames)]
+        calls[i] = prim.Call(ColMajorAccess(dtheta), (prim.Variable(quad_inames[i]), prim.Variable(inames[i])))
+        calls = tuple(calls)
+
+        # assignee = prim.Subscript(prim.Variable(name), tuple(prim.Variable(0)))
+        assignee = prim.Subscript(prim.Variable(name), (i,))
+        # assignee = prim.Variable(name)
+        expression = prim.Product(calls)
+
+        instruction(assignee=assignee,
+                    expression=expression,
+                    forced_iname_deps=frozenset(quad_inames + inames),
+                    forced_iname_deps_is_final=True,
+                    )
+
+
+def pymbolic_reference_gradient(element, restriction, number):
+    assert number == 1
+
+    # TODO: Change name?
+    name = "js_{}".format(FEM_name_mangling(element))
+    name = restricted_name(name, restriction)
+    evaluate_reference_gradient(element, name, restriction)
+
+    return prim.Variable(name)
--- a/python/dune/perftool/sumfact/quadrature.py
+++ b/python/dune/perftool/sumfact/quadrature.py
@@ -7,7 +7,7 @@ from dune.perftool.generation import (backend,
                                      temporary_variable,
                                      )

-from dune.perftool.sumfact.amatrix import (name_number_of_basis_functions_per_direction,
+from dune.perftool.sumfact.amatrix import (name_number_of_quadrature_points_per_direction,
                                           name_oned_quadrature_points,
                                           name_oned_quadrature_weights,
                                           )
@@ -71,7 +71,7 @@ def pymbolic_base_weight():
 @iname
 def sumfact_quad_iname(d, context):
    name = "quad_{}_{}".format(context, d)
-    domain(name, name_number_of_basis_functions_per_direction())
+    domain(name, name_number_of_quadrature_points_per_direction())
    return name



--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
+import copy
+
+from pymbolic.mapper.substitutor import substitute
+
 from dune.perftool.pdelab.argument import (name_accumulation_variable,
                                           name_coefficientcontainer,
                                           pymbolic_coefficient,
@@ -55,10 +59,10 @@ def sumfact_iname(bound, _type):
    return name


-def setup_theta(element, restriction, component, a_matrices):
+def setup_theta(element, restriction, component, a_matrices, buffer_name):
    number_basis = product(mat.cols for mat in a_matrices)
    shape = (number_basis,)
-    inp = get_buffer_temporary("buffer",
+    inp = get_buffer_temporary(buffer_name,
                               shape=shape)
    silenced_warning('read_no_write({})'.format(inp))

@@ -74,8 +78,9 @@ def setup_theta(element, restriction, component, a_matrices):


 def name_test_function_contribution(test):
+    count = get_counter('sumfact_contribution_counter')
    grad = "grad_" if test.reference_grad else ""
-    return restricted_name("contrib_{}phi".format(grad), test.restriction)
+    return restricted_name("contrib_{}phi_{}".format(grad, str(count)), test.restriction)


 @backend(interface="accum_insn", name="sumfact")
@@ -89,73 +94,113 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
    formdata = get_global_context_value('formdata')
    dim = formdata.geometric_dimension

-    # Get the a matrices needed for this accumulation term
-    theta_transposed = name_theta_transposed()
-    rows = basis_functions_per_direction()
-    cols = quadrature_points_per_direction()
-    a_matrix = AMatrix(theta_transposed, rows, cols)
-    a_matrices = (a_matrix, a_matrix)
-
-    # Get a matrix that holds the result of the pymbolic_expr at all quadrature points
-    buffer = name_test_function_contribution(accterm.argument)
-    temp = initialize_buffer(buffer,
-                             base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
-                             num=2
-                             ).get_temporary(shape=(quadrature_points_per_direction(),) * dim)
-
-    # Issue an instruction in the quadrature loop that fills the buffer
-    # with the evaluation of the contribution at all quadrature points
-    insn_dep = instruction(assignee=Subscript(Variable(temp), tuple(Variable(i) for i in quadrature_inames())),
-                           expression=pymbolic_expr,
-                           forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
-                           forced_iname_deps_is_final=True,
-                           )
-
-    # Add a sum factorization kernel that implements the multiplication
-    # with the test function (step 3)
-    result, insn_dep = sum_factorization_kernel(a_matrices,
-                                                buffer,
-                                                insn_dep=frozenset({insn_dep}),
-                                                additional_inames=frozenset(visitor.inames),
-                                                )
-
-    inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
-
-    # Collect the lfs and lfs indices for the accumulate call
-    test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
-    test_lfs.index = flatten_index(tuple(Variable(i) for i in inames),
-                                   (basis_functions_per_direction(),) * dim
-                                   )
-
-    # In the jacobian case, also determine the space for the ansatz space
-    ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
-    rank = 2 if visitor.inames else 1
-    if rank == 2:
-        ansatz_lfs.index = flatten_index(tuple(Variable(i) for i in visitor.inames),
-                                         (basis_functions_per_direction(),) * dim
-                                         )
-
-    # Construct the expression representing "{r,jac}.accumulate(..)"
-    accum = name_accumulation_variable()
-    expr = Call(PDELabAccumulationFunction(accum, rank),
-                (ansatz_lfs.get_args() +
-                 test_lfs.get_args() +
-                 (Subscript(Variable(result), tuple(Variable(i) for i in inames)),)
-                 )
-                )
-
-    instruction(assignees=(),
-                expression=expr,
-                forced_iname_deps=frozenset(inames + visitor.inames),
-                forced_iname_deps_is_final=True,
-                depends_on=insn_dep,
-                )
-
-    # Mark the transformation that moves the quadrature loop inside the trialfunction loops for application
-    transform(nest_quadrature_loops, visitor.inames)
-
-
-def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additional_inames=frozenset({})):
+    # Collect buffers we need
+    buffers = []
+
+    # If this is a gradient, we find the gradient iname
+    additional_inames = frozenset({})
+    if accterm.argument.index:
+        for i in accterm.argument.index._indices:
+            if i not in visitor.dimension_indices:
+                from dune.perftool.pdelab.localoperator import grad_iname
+                additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))
+                for i in range(dim):
+                    buffers.append(name_test_function_contribution(accterm.argument))
+    else:
+        buffers.append(name_test_function_contribution(accterm.argument))
+
+    # TODO covers only 2D
+    for i, buf in enumerate(buffers):
+        # Get the a matrices needed for this accumulation term
+        theta_transposed = name_theta_transposed()
+        rows = basis_functions_per_direction()
+        cols = quadrature_points_per_direction()
+        theta_matrix = AMatrix(theta_transposed, rows, cols)
+
+        # If this is a gradient we need different matrices
+        if accterm.argument.index:
+            from dune.perftool.sumfact.amatrix import name_dtheta_transposed
+            dtheta_transposed = name_dtheta_transposed()
+            rows = basis_functions_per_direction()
+            cols = quadrature_points_per_direction()
+            dtheta_matrix = AMatrix(dtheta_transposed, rows, cols)
+
+            a_matrices = [theta_matrix] * dim
+            a_matrices[i] = dtheta_matrix
+            a_matrices = tuple(a_matrices)
+
+        else:
+            a_matrices = (theta_matrix, theta_matrix)
+
+        temp = initialize_buffer(buf,
+                                 base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
+                                 num=2
+                                 ).get_temporary(shape=(quadrature_points_per_direction(),) * dim,
+                                                 dim_tags="f,f")
+
+        # Replace gradient iname with correct index for assignement
+        replace_dict = {}
+        expression = copy.deepcopy(pymbolic_expr)
+        for iname in additional_inames:
+            replace_dict[Variable(iname)] = i
+        expression = substitute(expression, replace_dict)
+
+        # Issue an instruction in the quadrature loop that fills the buffer
+        # with the evaluation of the contribution at all quadrature points
+        assignee = Subscript(Variable(temp), tuple(Variable(i) for i in quadrature_inames()))
+        insn_dep = instruction(assignee=assignee,
+                               expression=expression,
+                               forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
+                               forced_iname_deps_is_final=True,
+                               )
+
+        # Add a sum factorization kernel that implements the multiplication
+        # with the test function (step 3)
+        result, insn_dep = sum_factorization_kernel(a_matrices,
+                                                    buf,
+                                                    insn_dep=frozenset({insn_dep}),
+                                                    additional_inames=frozenset(visitor.inames),
+                                                    )
+
+        inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
+
+        # Collect the lfs and lfs indices for the accumulate call
+        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
+        test_lfs.index = flatten_index(tuple(Variable(i) for i in inames),
+                                       (basis_functions_per_direction(),) * dim,
+                                       order="f"
+                                       )
+
+        # In the jacobian case, also determine the space for the ansatz space
+        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
+        rank = 2 if visitor.inames else 1
+        if rank == 2:
+            ansatz_lfs.index = flatten_index(tuple(Variable(i) for i in visitor.inames),
+                                             (basis_functions_per_direction(),) * dim,
+                                             order="f"
+                                             )
+
+        # Construct the expression representing "{r,jac}.accumulate(..)"
+        accum = name_accumulation_variable()
+        expr = Call(PDELabAccumulationFunction(accum, rank),
+                    (ansatz_lfs.get_args() +
+                     test_lfs.get_args() +
+                     (Subscript(Variable(result), tuple(Variable(i) for i in inames)),)
+                     )
+                    )
+
+        instruction(assignees=(),
+                    expression=expr,
+                    forced_iname_deps=frozenset(inames + visitor.inames),
+                    forced_iname_deps_is_final=True,
+                    depends_on=insn_dep,
+                    )
+
+        # Mark the transformation that moves the quadrature loop inside the trialfunction loops for application
+        transform(nest_quadrature_loops, visitor.inames)
+
+
+def sum_factorization_kernel(a_matrices, buf, insn_dep=frozenset({}), additional_inames=frozenset({})):
    """
    Calculate a sum factorization matrix product.

@@ -168,7 +213,7 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
    a_matrices: An iterable of AMatrix instances
        The list of tensors to be applied to the input.
        Order of application is from 0 up.
-    buffer: A string identifying the flip flop buffer in use
+    buf: A string identifying the flip flop buffer in use
        for intermediate results. The memory is expected to be
        pre-initialized with the input.
    insn_dep: an instruction ID that the first issued instruction
@@ -180,21 +225,27 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
        # as a column-major matrix. In later iteration of the amatrix loop
        # this reinterprets the output of the previous iteration.
        inp_shape = (a_matrix.cols, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:]))
-        inp = get_buffer_temporary(buffer,
+        inp = get_buffer_temporary(buf,
                                   shape=inp_shape,
                                   dim_tags="f,f")

        # The input temporary will only be read from, so we need to silence the loopy warning
        silenced_warning('read_no_write({})'.format(inp))

-        switch_base_storage(buffer)
+        switch_base_storage(buf)

        # Get a temporary that interprets the base storage of the output
        # as row-major matrix
        out_shape = (a_matrix.rows, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:]))
-        out = get_buffer_temporary(buffer,
+
+        # Do not rotate in last step
+        out_tags = "c,c"
+        if l == len(a_matrices) - 1:
+            out_tags = "f,f"
+
+        out = get_buffer_temporary(buf,
                                   shape=out_shape,
-                                   dim_tags="c,c")
+                                   dim_tags=out_tags)

        # Get the inames needed for one matrix-matrix multiplication
        i = sumfact_iname(out_shape[0], "row")
@@ -217,4 +268,8 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
                                          )
                              })

+    out = get_buffer_temporary(buf,
+                               shape=out_shape,
+                               dim_tags="c,c")
+
    return out, insn_dep
--- a/test/sumfact/poisson/CMakeLists.txt
+++ b/test/sumfact/poisson/CMakeLists.txt
-# # 1. Poisson Test Case: source f, bc g
-# dune_add_formcompiler_system_test(UFLFILE poisson.ufl
-#                                   BASENAME sumfact_poisson
-#                                   INIFILE poisson.mini
-#                                   )
-#
+# 1. Poisson Test Case with order 1
+dune_add_formcompiler_system_test(UFLFILE poisson_order1.ufl
+                                  BASENAME sumfact_poisson_order1
+                                  INIFILE poisson_order1.mini
+                                  )
+
+
+# 1. Poisson Test Case with order 2
+dune_add_formcompiler_system_test(UFLFILE poisson_order2.ufl
+                                  BASENAME sumfact_poisson_order2
+                                  INIFILE poisson_order2.mini
+                                  )
+
 # # 2. Poisson Test Case: DG, f + pure dirichlet
-#dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl
+# dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl
 #                                  BASENAME sumfact_poisson_dg
 #                                  INIFILE poisson_dg.mini
 #                                  )
--- a/test/sumfact/poisson/poisson_dg_only_volume.mini
+++ b/test/sumfact/poisson/poisson_dg_only_volume.mini
+__name = sumfact_poisson_dg_only_volume_{__exec_suffix}
+
+__exec_suffix = {__num_suffix}_{__sumfact_suffix}
+__num_suffix  = numdiff, symdiff |expand num
+__sumfact_suffix = normal, sumfact | expand sumf
+
+cells = 1 1
+extension = 1. 1.
+printresidual = 1
+printmatrix = 1
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 1, 0 | expand num
+sumfact = 0, 1 | expand sumf
+print_transformations = 1
+print_transformations_dir = .
--- a/test/sumfact/poisson/poisson_dg_only_volume.ufl
+++ b/test/sumfact/poisson/poisson_dg_only_volume.ufl
+cell = "quadrilateral"
+
+f = Expression("return -2.0*x.size();", cell=cell)
+g = Expression("return x.two_norm2();", on_intersection=True, cell=cell)
+
+V = FiniteElement("DG", cell, 1)
+
+u = TrialFunction(V)
+v = TestFunction(V)
+
+n = FacetNormal(cell)('+')
+
+gamma = 1.0
+theta = 1.0
+
+r = inner(grad(u), grad(v))*dx \
+  - f*v*dx
+
+forms = [r]
\ No newline at end of file
--- a/test/sumfact/poisson/poisson.mini
+++ b/test/sumfact/poisson/poisson.mini
-__name = sumfact_poisson_{__exec_suffix}
+__name = sumfact_poisson_order1_{__exec_suffix}
 __exec_suffix = numdiff, symdiff | expand num

-cells = 1 1
+cells = 8 8
 extension = 1. 1.

 [wrapper.vtkcompare]
@@ -12,5 +12,5 @@ extension = vtu
 [formcompiler]
 numerical_jacobian = 1, 0 | expand num
 exact_solution_expression = g
-compare_l2errorsquared = 1e-6
+compare_l2errorsquared = 1e-4
 sumfact = 1
--- a/test/sumfact/poisson/poisson.ufl
+++ b/test/sumfact/poisson/poisson.ufl
--- a/test/sumfact/poisson/poisson_order2.mini
+++ b/test/sumfact/poisson/poisson_order2.mini
+__name = sumfact_poisson_order2_{__exec_suffix}
+__exec_suffix = numdiff, symdiff | expand num
+
+cells = 8 8
+extension = 1. 1.
+
+[wrapper.vtkcompare]
+name = {__name}
+reference = poisson_ref
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 1, 0 | expand num
+exact_solution_expression = g
+compare_l2errorsquared = 1e-8
+sumfact = 1
--- a/test/sumfact/poisson/poisson_order2.ufl
+++ b/test/sumfact/poisson/poisson_order2.ufl
+cell = "quadrilateral"
+
+f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());", cell=cell)
+g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());", cell=cell)
+
+V = FiniteElement("CG", cell, 2, dirichlet_expression=g)
+u = TrialFunction(V)
+v = TestFunction(V)
+
+forms = [(inner(grad(u), grad(v)) - f*v)*dx]