diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
index 22200ad351968fbeef9ddb19da07db127477f505..e6529ffd88247905b4ba85cc3d67e2b2893f817e 100644
--- a/applications/CMakeLists.txt
+++ b/applications/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(convection_diffusion)
 add_subdirectory(poisson_dg)
diff --git a/applications/convection_diffusion/CMakeLists.txt b/applications/convection_diffusion/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cbf0d4463491995042fc66aecae4e1f240dc537
--- /dev/null
+++ b/applications/convection_diffusion/CMakeLists.txt
@@ -0,0 +1,7 @@
+dune_add_formcompiler_system_test(UFLFILE conv_diff_dg.ufl
+                                  BASENAME app_conv_diff
+                                  INIFILE conv_diff_dg.mini
+                                  NO_TESTS
+                                  )
+
+dune_symlink_to_source_files(FILES donkey.sbatch)
diff --git a/applications/convection_diffusion/conv_diff_dg.mini b/applications/convection_diffusion/conv_diff_dg.mini
new file mode 100644
index 0000000000000000000000000000000000000000..f8357485b280ae77c18bfee823a1ad3d0453ad11
--- /dev/null
+++ b/applications/convection_diffusion/conv_diff_dg.mini
@@ -0,0 +1,50 @@
+__name = app_conv_diff_{__exec_suffix}
+__exec_suffix = deg{formcompiler.ufl_variants.degree}
+#__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}
+
+#opcount_suffix = opcount, nonopcount | expand opcount
+
+# Calculate the size of the grid to equlibritate it to 100 MB/rank
+# Input parameters
+dim = 3
+#mbperrank = 100
+#ranks = 16
+mbperrank = .1
+ranks = 1
+floatingbytes = 8
+
+# Metaini Calculations
+memperrank = {mbperrank} * 1048576 | eval
+dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval
+celldofs = {dofsperdir} ** {dim} | eval
+cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval
+cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint
+firstdircells = {ranks} * {cellsperdir} | eval
+dimminusone = {dim} - 1 | eval
+ones = 1 | repeat {dimminusone}
+otherdircells = {cellsperdir} | repeat {dimminusone}
+
+# Setup the grid!
+extension = 1.0 | repeat {dim}
+cells = {firstdircells} {otherdircells}
+partitioning = {ranks} {ones}
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+fastdg = 1
+sumfact = 1
+vectorize_quad = 1
+vectorize_grads = 1
+#instrumentation_level = 2
+#opcounter = 1, 0 | expand opcount
+#time_opcounter = 0, 1 | expand opcount
+exact_solution_expression = g
+compare_l2errorsquared = 1e-6
+
+[formcompiler.ufl_variants]
+cell = hexahedron
+#degree = 2, 3, 4, 5, 6, 7, 8, 9, 10 | expand
+degree = 2
diff --git a/applications/convection_diffusion/conv_diff_dg.ufl b/applications/convection_diffusion/conv_diff_dg.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..38e8dc7334af54b167a51955186f63660af12e4c
--- /dev/null
+++ b/applications/convection_diffusion/conv_diff_dg.ufl
@@ -0,0 +1,29 @@
+x = SpatialCoordinate(cell)
+
+g = x[0]*x[0] + x[1]*x[1] + x[2]*x[2]
+I = Identity(3)
+A = as_matrix([[x[i]*x[j] + I[i,j] for j in range(3)] for i in range(3)])
+f = -6.
+c = 10.
+
+V = FiniteElement("DG", cell, degree)
+
+u = TrialFunction(V)
+v = TestFunction(V)
+
+n = FacetNormal(cell)('+')
+
+gamma = 1.0
+theta = -1.0
+
+r = (inner(A*grad(u), grad(v)) + (c*u-f)*v)*dx \
+  + inner(n, A*avg(grad(u)))*jump(v)*dS \
+  + gamma*jump(u)*jump(v)*dS \
+  + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \
+  - inner(n, A*grad(u))*v*ds \
+  + gamma*u*v*ds \
+  - theta*u*inner(A*grad(v), n)*ds \
+  + theta*g*inner(A*grad(v), n)*ds \
+  - gamma*g*v*ds
+
+forms = [r]
diff --git a/applications/convection_diffusion/donkey.sbatch b/applications/convection_diffusion/donkey.sbatch
new file mode 100755
index 0000000000000000000000000000000000000000..abf8b1aa2795fcb894d0e7dccb5bf9f12d33e0ae
--- /dev/null
+++ b/applications/convection_diffusion/donkey.sbatch
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Load modules
+ml gcc/6.2
+ml intelmpi
+ml openblas
+ml metis
+ml suitesparse
+
+# Set a name for the job
+#SBATCH -J poisson_dg
+
+# Number of processes
+#SBATCH -n 16
+
+# Choose the SLURM partition (sinfo for overview)
+#SBATCH -p haswell16c
+
+# Each process needs two PUs: circumvent hyperthreading
+#SBATCH -c 2
+
+# Pin processes to cores
+# (Possible values: socket, core)
+SRUNOPT="--cpu_bind=verbose,core"
+
+# Run the opcount executables
+srun $SRUNOPT ./app_poisson_dg_deg2_opcount app_poisson_dg_3d_deg2_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg3_opcount app_poisson_dg_3d_deg3_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg4_opcount app_poisson_dg_3d_deg4_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg5_opcount app_poisson_dg_3d_deg5_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg6_opcount app_poisson_dg_3d_deg6_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg7_opcount app_poisson_dg_3d_deg7_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg8_opcount app_poisson_dg_3d_deg8_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg9_opcount app_poisson_dg_3d_deg9_opcount.ini
+srun $SRUNOPT ./app_poisson_dg_deg10_opcount app_poisson_dg_3d_deg10_opcount.ini
+
+# Run the timing executables
+COUNT=0
+while [ $COUNT -lt 2 ]; do
+    srun $SRUNOPT ./app_poisson_dg_deg2_nonopcount app_poisson_dg_3d_deg2_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg3_nonopcount app_poisson_dg_3d_deg3_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg4_nonopcount app_poisson_dg_3d_deg4_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg5_nonopcount app_poisson_dg_3d_deg5_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg6_nonopcount app_poisson_dg_3d_deg6_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg7_nonopcount app_poisson_dg_3d_deg7_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg8_nonopcount app_poisson_dg_3d_deg8_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg9_nonopcount app_poisson_dg_3d_deg9_nonopcount.ini
+    srun $SRUNOPT ./app_poisson_dg_deg10_nonopcount app_poisson_dg_3d_deg10_nonopcount.ini
+    COUNT=$((COUNT + 1))
+done
diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py
index 0b5789ce256b7e6080aa8c0eb1aa1b9bb2ab2d8f..315e3ceccee0d4d203bef033c901043cff4efa34 100644
--- a/python/dune/perftool/pdelab/tensors.py
+++ b/python/dune/perftool/pdelab/tensors.py
@@ -17,7 +17,10 @@ def define_list_tensor(name, expr, visitor, stack=()):
             define_list_tensor(name, child, visitor, stack=stack + (i,))
         else:
             instruction(assignee=prim.Subscript(prim.Variable(name), stack + (i,)),
-                        expression=visitor.call(child))
+                        expression=visitor.call(child),
+                        forced_iname_deps=frozenset(visitor.interface.quadrature_inames()),
+                        tags=frozenset({"quad"}),
+                        )
 
 
 @kernel_cached