diff --git a/.gitmodules b/.gitmodules
index 4cf4ba72af8959c4b35e4f3e734481602dde8ba8..1733ea4f5c7666f27dfc88945612f491bc6cc4df 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,7 +3,7 @@
 	url = https://gitlab.tiker.net/inducer/loopy.git
 [submodule "python/ufl"]
 	path = python/ufl
-	url = https://parcomp-git.iwr.uni-heidelberg.de/dominic/ufl.git
+	url = https://bitbucket.org/fenics-project/ufl.git
 [submodule "python/pymbolic"]
 	path = python/pymbolic
 	url = https://github.com/inducer/pymbolic.git
diff --git a/patches/apply_patches.sh b/patches/apply_patches.sh
index 2c38492b19c08452f7607b084f855782bb3cd35f..5fa3ab5e28c162fb391e0b66ec5aec066d4b903a 100755
--- a/patches/apply_patches.sh
+++ b/patches/apply_patches.sh
@@ -13,7 +13,3 @@ pushd python/ufl
 git apply ../../patches/ufl/conditional-uflid.patch
 git apply ../../patches/ufl/0001-Remove-special-case-for-variable-in-ufl2dot.patch
 popd
-
-pushd python/ufl
-git apply ../../patches/ufl/tensor-product-element.patch
-popd
diff --git a/patches/ufl/tensor-product-element.patch b/patches/ufl/tensor-product-element.patch
deleted file mode 100644
index 9fc64f124e95bcbf28391ed5be4c87e1040a4e28..0000000000000000000000000000000000000000
--- a/patches/ufl/tensor-product-element.patch
+++ /dev/null
@@ -1,19 +0,0 @@
-commit f87dcd18d765b0200808b79b2e7374f82a0c6199
-Author: René Heß <rene.hess@iwr.uni-heidelberg.de>
-Date:   Tue Aug 29 14:56:17 2017 +0200
-
-    Patch for TensorProductElements
-
-diff --git a/ufl/algorithms/compute_form_data.py b/ufl/algorithms/compute_form_data.py
-index 3388bbfc..1cef3924 100644
---- a/ufl/algorithms/compute_form_data.py
-+++ b/ufl/algorithms/compute_form_data.py
-@@ -56,7 +56,7 @@ def _auto_select_degree(elements):
-     """
-     # Use max degree of all elements, at least 1 (to work with
-     # Lagrange elements)
--    return max({e.degree() for e in elements} - {None} | {1})
-+    return max({e.degree() if not isinstance(e.degree(), tuple) else max(e.degree()) for e in elements} - {None} | {1})
- 
- 
- def _compute_element_mapping(form):
diff --git a/python/cgen b/python/cgen
index 0062a75a614db6602012b6e926c4b5ced06fcc89..f411383630b272a3a5d3e28b82acaaa530a64723 160000
--- a/python/cgen
+++ b/python/cgen
@@ -1 +1 @@
-Subproject commit 0062a75a614db6602012b6e926c4b5ced06fcc89
+Subproject commit f411383630b272a3a5d3e28b82acaaa530a64723
diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py
index b718c2c632fa2ec92cc907f6af1c0ad3d8464b9c..22a0c317295ae93976c364bd394022447d7e6d43 100644
--- a/python/dune/perftool/loopy/target.py
+++ b/python/dune/perftool/loopy/target.py
@@ -147,7 +147,7 @@ class DuneCExpressionToCodeMapper(CExpressionToCodeMapper):
 
 class DuneASTBuilder(CASTBuilder):
     def function_manglers(self):
-        return CASTBuilder.function_manglers(self) + retrieve_cache_functions("mangler")
+        return retrieve_cache_functions("mangler") + CASTBuilder.function_manglers(self)
 
     def get_expression_to_c_expression_mapper(self, codegen_state):
         return DuneExpressionToCExpressionMapper(codegen_state)
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 68f8e5dbbe0ba67382b4e9a708880beaeebda79f..c89a23cf1f8970213825806804015fc861797ea1 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -514,6 +514,7 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin
     from loopy import Options
     opt = Options(ignore_boostable_into=True,
                   check_dep_resolution=False,
+                  enforce_variable_access_ordered="no_check",
                   )
 
     # Create the kernel
@@ -526,6 +527,7 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin
                          options=opt,
                          silenced_warnings=silenced,
                          name=name,
+                         lang_version=(2017, 2, 1),
                          )
 
     from loopy import make_reduction_inames_unique
@@ -682,6 +684,9 @@ def cgen_class_from_cache(tag, members=[]):
     signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params))
     constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il)
 
+    from loopy import get_one_scheduled_kernel
+    constructor_knl = get_one_scheduled_kernel(constructor_knl)
+
     # Take any temporary declarations from the kernel and make them class members
     target = DuneTarget()
     from loopy.codegen import CodeGenerationState
diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py
index 5309fa832c7b805d4dc7e3d31213a2a70c0c2087..99107c9cd8f6a4429965985fb8fbbdcbd3e898e9 100644
--- a/python/dune/perftool/sumfact/tabulation.py
+++ b/python/dune/perftool/sumfact/tabulation.py
@@ -402,9 +402,7 @@ def define_theta(name, tabmat, additional_indices=(), width=None):
     bound = tabmat.quadrature_size
     if tabmat.slice_size is not None:
         bound *= tabmat.slice_size
-    qp = name_oned_quadrature_points(bound)
-    qw = name_oned_quadrature_weights(bound)
-    sort_quadrature_points_weights(qp, qw, bound)
+
     degree = tabmat.basis_size - 1
     polynomials = name_polynomials(degree)
 
@@ -433,9 +431,15 @@ def define_theta(name, tabmat, additional_indices=(), width=None):
     if tabmat.slice_size is not None:
         inames[0] = tabmat.slice_size * inames[0] + tabmat.slice_index
 
-    args = [inames[1], prim.Subscript(prim.Variable(qp), (inames[0],))]
-    if tabmat.face is not None:
-        args[1] = tabmat.face
+    args = [inames[1]]
+
+    if tabmat.face is None:
+        qp = name_oned_quadrature_points(bound)
+        qw = name_oned_quadrature_weights(bound)
+        sort_quadrature_points_weights(qp, qw, bound)
+        args.append(prim.Subscript(prim.Variable(qp), (inames[0],)))
+    else:
+        args.append(tabmat.face)
 
     instruction(assignee=prim.Subscript(prim.Variable(name), (i, j) + additional_indices),
                 expression=prim.Call(PolynomialLookup(polynomials, tabmat.derivative), tuple(args)),
diff --git a/python/loopy b/python/loopy
index e4a05746af70ed6e6b7e5b91984f7303fe96f1f4..dedb956bd72a204a685e7aeb7788d1fa55969899 160000
--- a/python/loopy
+++ b/python/loopy
@@ -1 +1 @@
-Subproject commit e4a05746af70ed6e6b7e5b91984f7303fe96f1f4
+Subproject commit dedb956bd72a204a685e7aeb7788d1fa55969899
diff --git a/python/pymbolic b/python/pymbolic
index 915ecb96c1eb60b82973e8cf695e4ffcb622c90a..ffecfaebf21dc8799cd5d007a969e659b255a1e3 160000
--- a/python/pymbolic
+++ b/python/pymbolic
@@ -1 +1 @@
-Subproject commit 915ecb96c1eb60b82973e8cf695e4ffcb622c90a
+Subproject commit ffecfaebf21dc8799cd5d007a969e659b255a1e3
diff --git a/python/pytools b/python/pytools
index e4dd13899c9161ce641c29c55973bfce3df52972..747a1c1fac3fb4f2067f00c1a670f5a7b963b396 160000
--- a/python/pytools
+++ b/python/pytools
@@ -1 +1 @@
-Subproject commit e4dd13899c9161ce641c29c55973bfce3df52972
+Subproject commit 747a1c1fac3fb4f2067f00c1a670f5a7b963b396
diff --git a/python/ufl b/python/ufl
index 962d56f65821fb9c50ca4a5a858882c472243431..5a9593c956fc843eee6ce3a2ae2b9cbc4aec62bf 160000
--- a/python/ufl
+++ b/python/ufl
@@ -1 +1 @@
-Subproject commit 962d56f65821fb9c50ca4a5a858882c472243431
+Subproject commit 5a9593c956fc843eee6ce3a2ae2b9cbc4aec62bf
diff --git a/test/blockstructured/stokes/stokes.mini b/test/blockstructured/stokes/stokes.mini
index a0a261db54fd9432ad5e8cb6d467b41ed06039e2..532a4159b6b6019525acdea50da3c18f14e22f9a 100644
--- a/test/blockstructured/stokes/stokes.mini
+++ b/test/blockstructured/stokes/stokes.mini
@@ -10,7 +10,7 @@ reference = hagenpoiseuille_ref
 extension = vtu
 
 [formcompiler]
-compare_l2errorsquared = 1e-10
+compare_l2errorsquared = 1e-9
 
 [formcompiler.r]
 numerical_jacobian = 0, 1 | expand num