diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
index 22200ad351968fbeef9ddb19da07db127477f505..e6529ffd88247905b4ba85cc3d67e2b2893f817e 100644
--- a/applications/CMakeLists.txt
+++ b/applications/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(convection_diffusion)
 add_subdirectory(poisson_dg)
diff --git a/applications/convection_diffusion/CMakeLists.txt b/applications/convection_diffusion/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cbf0d4463491995042fc66aecae4e1f240dc537
--- /dev/null
+++ b/applications/convection_diffusion/CMakeLists.txt
@@ -0,0 +1,7 @@
+dune_add_formcompiler_system_test(UFLFILE conv_diff_dg.ufl
+                                  BASENAME app_conv_diff
+                                  INIFILE conv_diff_dg.mini
+                                  NO_TESTS
+                                  )
+
+dune_symlink_to_source_files(FILES donkey.sbatch)
diff --git a/applications/convection_diffusion/conv_diff_dg.mini b/applications/convection_diffusion/conv_diff_dg.mini
new file mode 100644
index 0000000000000000000000000000000000000000..7476dd5e0913c620236371bfeb378d9e92ef2a9b
--- /dev/null
+++ b/applications/convection_diffusion/conv_diff_dg.mini
@@ -0,0 +1,46 @@
+__name = app_conv_diff_{__exec_suffix}
+__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}
+
+opcount_suffix = opcount, nonopcount | expand opcount
+
+# Calculate the size of the grid to equlibritate it to 100 MB/rank
+# Input parameters
+dim = 3
+mbperrank = 100
+ranks = 16
+floatingbytes = 8
+
+# Metaini Calculations
+memperrank = {mbperrank} * 1048576 | eval
+dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval
+celldofs = {dofsperdir} ** {dim} | eval
+cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval
+cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint
+firstdircells = {ranks} * {cellsperdir} | eval
+dimminusone = {dim} - 1 | eval
+ones = 1 | repeat {dimminusone}
+otherdircells = {cellsperdir} | repeat {dimminusone}
+
+# Setup the grid!
+extension = 1.0 | repeat {dim}
+cells = {firstdircells} {otherdircells}
+partitioning = {ranks} {ones}
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+fastdg = 1
+sumfact = 1
+vectorize_quad = 1
+vectorize_grads = 1
+instrumentation_level = 2
+opcounter = 1, 0 | expand opcount
+time_opcounter = 0, 1 | expand opcount
+exact_solution_expression = g
+compare_l2errorsquared = 1e-6
+
+[formcompiler.ufl_variants]
+cell = hexahedron
+degree = 2, 3, 4, 5, 6, 7, 8, 9, 10 | expand
diff --git a/applications/convection_diffusion/conv_diff_dg.ufl b/applications/convection_diffusion/conv_diff_dg.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..38e8dc7334af54b167a51955186f63660af12e4c
--- /dev/null
+++ b/applications/convection_diffusion/conv_diff_dg.ufl
@@ -0,0 +1,29 @@
+x = SpatialCoordinate(cell)
+
+g = x[0]*x[0] + x[1]*x[1] + x[2]*x[2]
+I = Identity(3)
+A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(3)] for i in range(3)])
+f = -6.
+c = 10.
+
+V = FiniteElement("DG", cell, degree)
+
+u = TrialFunction(V)
+v = TestFunction(V)
+
+n = FacetNormal(cell)('+')
+
+gamma = 1.0
+theta = -1.0
+
+r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \
+  + inner(n, A*avg(grad(u)))*jump(v)*dS \
+  + gamma*jump(u)*jump(v)*dS \
+  + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \
+  - inner(n, A*grad(u))*v*ds \
+  + gamma*u*v*ds \
+  - theta*u*inner(A*grad(v), n)*ds \
+  + theta*g*inner(A*grad(v), n)*ds \
+  - gamma*g*v*ds
+
+forms = [r]
diff --git a/applications/convection_diffusion/donkey.sbatch b/applications/convection_diffusion/donkey.sbatch
new file mode 100755
index 0000000000000000000000000000000000000000..9b9a9749658a49191634a2362114d37bae384c0d
--- /dev/null
+++ b/applications/convection_diffusion/donkey.sbatch
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Load modules
+ml gcc/6.2
+ml intelmpi
+ml openblas
+ml metis
+ml suitesparse
+
+# Set a name for the job
+#SBATCH -J conv_diff
+
+# Number of processes
+#SBATCH -n 16
+
+# Choose the SLURM partition (sinfo for overview)
+#SBATCH -p haswell16c
+
+# Each process needs two PUs: circumvent hyperthreading
+#SBATCH -c 2
+
+# Pin processes to cores
+# (Possible values: socket, core)
+SRUNOPT="--cpu_bind=verbose,core"
+
+# Run the opcount executables
+srun $SRUNOPT ./app_conv_diff_deg2_opcount app_conv_diff_deg2_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg3_opcount app_conv_diff_deg3_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg4_opcount app_conv_diff_deg4_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg5_opcount app_conv_diff_deg5_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg6_opcount app_conv_diff_deg6_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg7_opcount app_conv_diff_deg7_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg8_opcount app_conv_diff_deg8_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg9_opcount app_conv_diff_deg9_opcount.ini
+srun $SRUNOPT ./app_conv_diff_deg10_opcount app_conv_diff_deg10_opcount.ini
+
+# Run the timing executables
+COUNT=0
+while [ $COUNT -lt 10 ]; do
+    srun $SRUNOPT ./app_conv_diff_deg2_nonopcount app_conv_diff_deg2_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg3_nonopcount app_conv_diff_deg3_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg4_nonopcount app_conv_diff_deg4_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg5_nonopcount app_conv_diff_deg5_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg6_nonopcount app_conv_diff_deg6_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg7_nonopcount app_conv_diff_deg7_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg8_nonopcount app_conv_diff_deg8_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg9_nonopcount app_conv_diff_deg9_nonopcount.ini
+    srun $SRUNOPT ./app_conv_diff_deg10_nonopcount app_conv_diff_deg10_nonopcount.ini
+    COUNT=$((COUNT + 1))
+done
diff --git a/applications/poisson_dg/donkey.sbatch b/applications/poisson_dg/donkey.sbatch
index abf8b1aa2795fcb894d0e7dccb5bf9f12d33e0ae..075bb5c85f5cca6574c40fdd469201967bcf90e3 100755
--- a/applications/poisson_dg/donkey.sbatch
+++ b/applications/poisson_dg/donkey.sbatch
@@ -36,7 +36,7 @@ srun $SRUNOPT ./app_poisson_dg_deg10_opcount app_poisson_dg_3d_deg10_opcount.ini
 
 # Run the timing executables
 COUNT=0
-while [ $COUNT -lt 2 ]; do
+while [ $COUNT -lt 10 ]; do
     srun $SRUNOPT ./app_poisson_dg_deg2_nonopcount app_poisson_dg_3d_deg2_nonopcount.ini
     srun $SRUNOPT ./app_poisson_dg_deg3_nonopcount app_poisson_dg_3d_deg3_nonopcount.ini
     srun $SRUNOPT ./app_poisson_dg_deg4_nonopcount app_poisson_dg_3d_deg4_nonopcount.ini
diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py
index 863038390e37322c615de38feb35fe563101174c..52f707ce70455bbb1dd3cc6e954aeaa13daed48d 100644
--- a/python/dune/perftool/compile.py
+++ b/python/dune/perftool/compile.py
@@ -68,7 +68,10 @@ def read_ufl(uflfile):
         name = "{}_debug".format(name)
         pyname = "{}.py".format(name)
         print(pyname)
-        pycode = "#!/usr/bin/env python\nfrom dune.perftool.ufl.execution import *\nset_level(DEBUG)\n" + uflcode
+        pycode = "#!/usr/bin/env python\nfrom dune.perftool.ufl.execution import *\nset_level(DEBUG)\n"
+        for k, v in ini.get("formcompiler.ufl_variants", {}).items():
+            pycode = pycode + "{} = {}\n".format(k, repr(type_guessing(v)))
+        pycode = pycode + uflcode
         with file(pyname, "w") as f:
             f.write(pycode)
         raise SyntaxError("Not a valid ufl file, dumped a debug script: {}".format(pyname))
diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index 9004b2f2bb946ecc0d328f933d919205a174ab10..78070a10be4fef8c535a9c3dad6f0a02fe323edf 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -167,7 +167,7 @@ def collect_vector_data_rotate(knl):
                 #
 
                 # 1. Rotating the input data
-                knl = add_vector_view(knl, quantity)
+                knl = add_vector_view(knl, quantity, flatview=True)
                 include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile")
                 new_insns.append(lp.CallInstruction((),  # assignees
                                                     prim.Call(prim.Variable("transpose_reg"),
@@ -188,7 +188,7 @@ def collect_vector_data_rotate(knl):
             else:
                 # Add a vector view to this quantity
                 expr, = quantities[quantity]
-                knl = add_vector_view(knl, quantity)
+                knl = add_vector_view(knl, quantity, flatview=True)
                 replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
                                                       (prim.Variable("vec_index"), prim.Variable(new_iname)),
                                                       )
@@ -289,7 +289,7 @@ def collect_vector_data_rotate(knl):
     for insn in insns:
         # Get a vector view of the lhs expression
         lhsname = get_pymbolic_basename(insn.assignee)
-        knl = add_vector_view(knl, lhsname, pad_to=vec_size)
+        knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True)
         lhsname = get_vector_view_name(lhsname)
 
         if rotating:
diff --git a/python/dune/perftool/loopy/transformations/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py
index e0d78e140975d5b0637272597487b609b05777a3..7fc3ff8f2ea840be0f4c3841ba701176822365c8 100644
--- a/python/dune/perftool/loopy/transformations/vectorview.py
+++ b/python/dune/perftool/loopy/transformations/vectorview.py
@@ -16,7 +16,7 @@ def get_vector_view_name(tmpname):
     return tmpname + "_vec"
 
 
-def add_vector_view(knl, tmpname, pad_to=None):
+def add_vector_view(knl, tmpname, pad_to=None, flatview=False):
     """
     Kernel transformation to add a vector view temporary
     that interprets the same memory as another temporary
@@ -53,11 +53,26 @@ def add_vector_view(knl, tmpname, pad_to=None):
     if pad_to:
         size = (size // pad_to + 1) * pad_to
 
+    # Some vectorview are intentionally flat! (e.g. the output buffers of
+    # sum factorization kernels
+    if flatview:
+        shape = (size, vecsize)
+        dim_tags = "c,vec"
+    else:
+        shape = temp.shape
+        # This works around a loopy weirdness (which might as well be a bug)
+        # TODO: investigate this!
+        if len(shape) == 1:
+            shape = (1, vecsize)
+            dim_tags = "c,vec"
+        else:
+            dim_tags = temp.dim_tags[:-1] + ("vec",)
+
     # Now add a vector view temporary
     vecname = tmpname + "_vec"
     temporaries[vecname] = lp.TemporaryVariable(vecname,
-                                                dim_tags="c,vec",
-                                                shape=(size, vecsize),
+                                                dim_tags=dim_tags,
+                                                shape=shape,
                                                 base_storage=bsname,
                                                 dtype=np.float64,
                                                 scope=lp.temp_var_scope.PRIVATE,
diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index 76f894bfe0a5cef91d547557173ea15b18a25c6a..8529eb1e55186f52c379337be47165800cf21cee 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -24,7 +24,7 @@ from dune.perftool.pdelab.parameter import (cell_parameter_function,
                                             intersection_parameter_function,
                                             )
 from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_weight,
-                                             pymbolic_quadrature_position_in_cell,
+                                             pymbolic_quadrature_position,
                                              quadrature_inames,
                                              )
 from dune.perftool.pdelab.spaces import (lfs_inames,
@@ -94,8 +94,8 @@ class PDELabInterface(object):
     # Geometry related generator functions
     #
 
-    def pymbolic_spatial_coordinate(self, restriction):
-        return to_global(pymbolic_quadrature_position_in_cell(restriction))
+    def pymbolic_spatial_coordinate(self):
+        return to_global(pymbolic_quadrature_position())
 
     def name_facet_jacobian_determinant(self):
         return name_facet_jacobian_determinant()
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 50f4b2a00ca3c7818bca9619cb58808ed2855a92..f6ffbec814cd02ef5053ffc2659209535b3d61bc 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -336,7 +336,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
     ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
 
     from dune.perftool.pdelab.argument import name_accumulation_variable
-    accumvar = name_accumulation_variable((ansatz_lfs.get_restriction() + test_lfs.get_restriction()))
+    accumvar = name_accumulation_variable((test_lfs.get_restriction() + ansatz_lfs.get_restriction()))
 
     predicates = boundary_predicates(accterm.term, measure, subdomain_id, visitor)
 
@@ -345,7 +345,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
     from dune.perftool.pdelab.argument import PDELabAccumulationFunction
     from pymbolic.primitives import Call
     expr = Call(PDELabAccumulationFunction(accumvar, rank),
-                (ansatz_lfs.get_args() + test_lfs.get_args() + (pymbolic_expr,))
+                (test_lfs.get_args() + ansatz_lfs.get_args() + (pymbolic_expr,))
                 )
 
     from dune.perftool.generation import instruction
diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py
index 16cf6bb6a4d7bb3c1815ca372f3cdfdd53617167..315e3ceccee0d4d203bef033c901043cff4efa34 100644
--- a/python/dune/perftool/pdelab/tensors.py
+++ b/python/dune/perftool/pdelab/tensors.py
@@ -10,10 +10,17 @@ import pymbolic.primitives as prim
 import numpy as np
 
 
-def define_list_tensor(name, expr, visitor):
+def define_list_tensor(name, expr, visitor, stack=()):
     for i, child in enumerate(expr.ufl_operands):
-        instruction(assignee=prim.Subscript(prim.Variable(name), (i,)),
-                    expression=visitor.call(child))
+        from ufl.classes import ListTensor
+        if isinstance(child, ListTensor):
+            define_list_tensor(name, child, visitor, stack=stack + (i,))
+        else:
+            instruction(assignee=prim.Subscript(prim.Variable(name), stack + (i,)),
+                        expression=visitor.call(child),
+                        forced_iname_deps=frozenset(visitor.interface.quadrature_inames()),
+                        tags=frozenset({"quad"}),
+                        )
 
 
 @kernel_cached
@@ -22,6 +29,7 @@ def pymbolic_list_tensor(expr, visitor):
     temporary_variable(name,
                        shape=expr.ufl_shape,
                        dtype=np.float64,
+                       managed=True,
                        )
     define_list_tensor(name, expr, visitor)
     return prim.Variable(name)
diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py
index 60b120ce95e9650b9a358632f6d3bad7249a4d25..0389416b5999e96b7e303434f07cab7e07a824b5 100644
--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -3,7 +3,7 @@ from dune.perftool.pdelab.argument import (name_applycontainer,
                                            )
 from dune.perftool.sumfact.quadrature import (quadrature_inames,
                                               quadrature_weight,
-                                              pymbolic_quadrature_position_in_cell,
+                                              pymbolic_quadrature_position,
                                               )
 
 from dune.perftool.sumfact.basis import (lfs_inames,
@@ -53,6 +53,6 @@ class SumFactInterface(PDELabInterface):
     def pymbolic_quadrature_weight(self):
         return quadrature_weight()
 
-    def pymbolic_spatial_coordinate(self, restriction):
+    def pymbolic_spatial_coordinate(self):
         from dune.perftool.pdelab.geometry import to_global
-        return to_global(pymbolic_quadrature_position_in_cell(restriction))
+        return to_global(pymbolic_quadrature_position())
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index ea22fd5d03950c65f2655a99ede0bce491ca5678..d56e080b5856367170f882b2df43fbea7e27f738 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -249,7 +249,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                              )
 
         # Construct the expression representing "{r,jac}.accumulate(..)"
-        accum = name_accumulation_variable(ansatz_lfs.get_restriction() + test_lfs.get_restriction())
+        accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
 
         # Determine the expression to accumulate with. This depends on the vectorization strategy!
         result = prim.Subscript(result, tuple(prim.Variable(i) for i in inames))
@@ -287,7 +287,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                 accum = accum + ".data()"
                 size = basis_functions_per_direction() ** world_dimension()
                 globalarg(accum, dtype=np.float64, shape=(size, size), managed=True)
-                assignee = prim.Subscript(prim.Variable(accum), (ansatz_lfs.index, test_lfs.index))
+                assignee = prim.Subscript(prim.Variable(accum), (test_lfs.index, ansatz_lfs.index))
                 expression = prim.Sum((assignee, result))
                 instruction(assignee=assignee,
                             expression=expression,
@@ -298,8 +298,8 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
         # Default: Generate accumulation instructions
         else:
             expr = Call(PDELabAccumulationFunction(accum, rank),
-                        (ansatz_lfs.get_args() +
-                         test_lfs.get_args() +
+                        (test_lfs.get_args() +
+                         ansatz_lfs.get_args() +
                          (result,)
                          )
                         )
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index f8129bf225c12f80bd0df0a546eb2a27189df129..f8d1f7d0ed94025a84f851395a8b20c0bd0988c4 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -315,7 +315,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         if get_global_context_value("driver", False):
             return prim.Variable("x")
         else:
-            return self.interface.pymbolic_spatial_coordinate(self.restriction)
+            return self.interface.pymbolic_spatial_coordinate()
 
     def facet_normal(self, o):
         # The normal must be restricted to be well-defined
diff --git a/test/poisson/CMakeLists.txt b/test/poisson/CMakeLists.txt
index 844fdce8d338ebfcb59b916d21d97d87829f0d60..e1925db7fffc33641255e37ec022298ef27ea642 100644
--- a/test/poisson/CMakeLists.txt
+++ b/test/poisson/CMakeLists.txt
@@ -60,6 +60,18 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_quadrilateral.ufl
                                   INIFILE poisson_dg_quadrilateral.mini
                                   )
 
+# 10. Poisson Test Case with a full permeability tensor
+dune_add_formcompiler_system_test(UFLFILE poisson_tensor.ufl
+                                  BASENAME poisson_tensor
+                                  INIFILE poisson_tensor.mini
+                                  )
+
+# 11. Poisson Test Case: DG with full permeability tensor
+dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
+                                  BASENAME poisson_dg_tensor
+                                  INIFILE poisson_dg_tensor.mini
+                                  )
+
 # the reference vtk file
 add_executable(poisson_dg_ref reference_main.cc)
 set_target_properties(poisson_dg_ref PROPERTIES EXCLUDE_FROM_ALL 1)
diff --git a/test/poisson/poisson_dg.ufl b/test/poisson/poisson_dg.ufl
index 06f3eb0f953695ccb9bf948fa947080901956e97..0653a89cf5a7eb4d0a3ec8a6e44df485ec2d53bb 100644
--- a/test/poisson/poisson_dg.ufl
+++ b/test/poisson/poisson_dg.ufl
@@ -13,7 +13,7 @@ v = TestFunction(V)
 n = FacetNormal(cell)('+')
 
 gamma = 1.0
-theta = 1.0
+theta = -1.0
 
 r = inner(grad(u), grad(v))*dx \
   + inner(n, avg(grad(u)))*jump(v)*dS \
diff --git a/test/poisson/poisson_dg_tensor.mini b/test/poisson/poisson_dg_tensor.mini
new file mode 100644
index 0000000000000000000000000000000000000000..f3463f5c998ee7baf07904bbb31c0b0661cef230
--- /dev/null
+++ b/test/poisson/poisson_dg_tensor.mini
@@ -0,0 +1,17 @@
+__name = poisson_dg_tensor_{__exec_suffix}
+__exec_suffix = numdiff, symdiff | expand num
+
+lowerleft = 0.0 0.0
+extension = 1.0 1.0
+cells = 32 32
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+reference = poisson_dg_ref
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 1, 0 | expand num
+exact_solution_expression = g
+compare_l2errorsquared = 2e-5
diff --git a/test/poisson/poisson_dg_tensor.ufl b/test/poisson/poisson_dg_tensor.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..f88ec5673658fd0b63c63b98329b90384805c24a
--- /dev/null
+++ b/test/poisson/poisson_dg_tensor.ufl
@@ -0,0 +1,32 @@
+cell = quadrilateral
+
+x = SpatialCoordinate(cell)
+
+I = Identity(2)
+A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(2)] for i in range(2)])
+g = x[0]**2 + x[1]**2
+c = 8.
+f = -4.
+
+V = FiniteElement("DG", cell, 1)
+
+u = TrialFunction(V)
+v = TestFunction(V)
+
+n = FacetNormal(cell)('+')
+
+gamma = 1.0
+theta = -1.0
+
+r = (inner(A*grad(u), grad(v)) + c*u*v)*dx \
+  + inner(n, A*avg(grad(u)))*jump(v)*dS \
+  + gamma*jump(u)*jump(v)*dS \
+  + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \
+  - inner(n, A*grad(u))*v*ds \
+  + gamma*u*v*ds \
+  - theta*u*inner(A*grad(v), n)*ds \
+  - f*v*dx \
+  + theta*g*inner(A*grad(v), n)*ds \
+  - gamma*g*v*ds
+
+forms = [r]
diff --git a/test/poisson/poisson_tensor.mini b/test/poisson/poisson_tensor.mini
new file mode 100644
index 0000000000000000000000000000000000000000..dd922fef55a7080284d9be134d7a1aae0dfabdce
--- /dev/null
+++ b/test/poisson/poisson_tensor.mini
@@ -0,0 +1,17 @@
+__name = poisson_tensor_{__exec_suffix}
+__exec_suffix = numdiff, symdiff | expand num
+
+lowerleft = 0.0 0.0
+upperright = 1.0 1.0
+elements = 32 32
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+reference = poisson_ref
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 1, 0 | expand num
+exact_solution_expression = g
+compare_l2errorsquared = 1e-6
diff --git a/test/poisson/poisson_tensor.ufl b/test/poisson/poisson_tensor.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..c8d936acb7e49dfd265c21ea6e4ea591281f679c
--- /dev/null
+++ b/test/poisson/poisson_tensor.ufl
@@ -0,0 +1,15 @@
+cell = triangle
+
+x = SpatialCoordinate(cell)
+
+I = Identity(2)
+A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(2)] for i in range(2)])
+g = x[0]**2 + x[1]**2
+c = 8.
+f = -4.
+
+V = FiniteElement("CG", cell, 1, dirichlet_expression=g)
+u = TrialFunction(V)
+v = TestFunction(V)
+
+forms = [(inner(A*grad(u), grad(v)) + c*u*v -f*v)*dx]
diff --git a/test/sumfact/poisson/CMakeLists.txt b/test/sumfact/poisson/CMakeLists.txt
index aab2e764b019fe0281c20075a819a29b52f26640..062fc23d1d26a03922ec30a8f32164c5c344eda2 100644
--- a/test/sumfact/poisson/CMakeLists.txt
+++ b/test/sumfact/poisson/CMakeLists.txt
@@ -38,3 +38,9 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_2d.ufl
                                   BASENAME sumfact_poisson_fastdg
                                   INIFILE poisson_fastdg.mini
                                   )
+
+# Poisson DG with a full tensor
+dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
+                                  BASENAME sumfact_poisson_dg_tensor
+                                  INIFILE poisson_dg_tensor.mini
+                                  )
diff --git a/test/sumfact/poisson/poisson_dg_tensor.mini b/test/sumfact/poisson/poisson_dg_tensor.mini
new file mode 100644
index 0000000000000000000000000000000000000000..3f561fa46fcb494d812646d96daf6f11c4b72605
--- /dev/null
+++ b/test/sumfact/poisson/poisson_dg_tensor.mini
@@ -0,0 +1,22 @@
+__name = sumfact_poisson_dg_tensor_{__exec_suffix}
+__exec_suffix = {quadvec_suffix}_{gradvec_suffix}
+
+quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
+
+cells = 8 8 8
+extension = 1. 1. 1.
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+sumfact = 1
+exact_solution_expression = g
+compare_l2errorsquared = 3e-4
+vectorize_quad = 1, 0 | expand quad
+vectorize_grads = 1, 0 | expand grad
+
+[formcompiler.ufl_variants]
+degree = 2
diff --git a/test/sumfact/poisson/poisson_dg_tensor.ufl b/test/sumfact/poisson/poisson_dg_tensor.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..ceecebc5eba11f95b7bac462631e61955d4ab69a
--- /dev/null
+++ b/test/sumfact/poisson/poisson_dg_tensor.ufl
@@ -0,0 +1,32 @@
+cell = hexahedron
+
+x = SpatialCoordinate(cell)
+
+I = Identity(3)
+A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(3)] for i in range(3)])
+g = x[0]**2 + x[1]**2 + x[2]**2
+c = 10.
+f = -6.
+
+V = FiniteElement("DG", cell, 1)
+
+u = TrialFunction(V)
+v = TestFunction(V)
+
+n = FacetNormal(cell)('+')
+
+gamma = 1.0
+theta = -1.0
+
+r = (inner(A*grad(u), grad(v)) + c*u*v)*dx \
+  + inner(n, A*avg(grad(u)))*jump(v)*dS \
+  + gamma*jump(u)*jump(v)*dS \
+  + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \
+  - inner(n, A*grad(u))*v*ds \
+  + gamma*u*v*ds \
+  - theta*u*inner(A*grad(v), n)*ds \
+  - f*v*dx \
+  + theta*g*inner(A*grad(v), n)*ds \
+  - gamma*g*v*ds
+
+forms = [r]