From 55e7cc0ba526b739e2ddb3118057811a314742b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Wed, 30 Aug 2017 10:02:15 +0200
Subject: [PATCH] Anisotropic quadrature order (not for facets!)

Make anisotropic quadrature order possible for non DG examples.
---
 python/dune/perftool/generation/loopy.py      | 17 +++-
 .../loopy/transformations/vectorize_quad.py   |  1 +
 python/dune/perftool/pdelab/quadrature.py     | 44 +++++++--
 python/dune/perftool/pdelab/spaces.py         |  2 +
 python/dune/perftool/sumfact/accumulation.py  | 15 ++-
 python/dune/perftool/sumfact/basis.py         | 33 +++++--
 python/dune/perftool/sumfact/quadrature.py    | 84 ++++++++++------
 python/dune/perftool/sumfact/symbolic.py      |  3 +-
 python/dune/perftool/sumfact/tabulation.py    | 96 ++++++++++++-------
 python/dune/perftool/sumfact/vectorization.py |  1 -
 python/dune/perftool/ufl/visitor.py           |  2 +-
 test/sumfact/poisson/poisson_2d.mini          |  1 +
 12 files changed, 205 insertions(+), 94 deletions(-)

diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index 171ba670..fb3dbaba 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -50,10 +50,21 @@ def valuearg(name, **kw):
 
 @generator_factory(item_tags=("domain",), context_tags="kernel")
 def domain(iname, shape):
-    if isinstance(iname, tuple):
+    if isinstance(iname, tuple) and isinstance(shape, tuple):
+        assert(len(iname) == len(shape))
+        condition = ""
+        for index, (i, s) in enumerate(zip(iname, shape)):
+            if index > 0:
+                condition += " and "
+            else:
+                condition += " "
+            condition += "0<={}<{}".format(i, s)
         iname = ",".join(iname)
-    if isinstance(shape, str):
-        valuearg(shape)
+        return "{{ [{}] : {} }}".format(iname, condition)
+    elif isinstance(iname, tuple):
+        iname = ",".join(iname)
+        if isinstance(shape, str):
+            valuearg(shape)
     return "{{ [{0}] : 0<={0}<{1} }}".format(iname, shape)
 
 
diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py
index 149002c4..abc72940 100644
--- a/python/dune/perftool/loopy/transformations/vectorize_quad.py
+++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py
@@ -136,6 +136,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix):
             # Do precomputation of the quantity
             prec_quantity = "{}_precomputed".format(quantity)
             prec_quantities.append(prec_quantity)
+
             knl = lp.precompute(knl, subst_name, inames,
                                 temporary_name=prec_quantity,
                                 )
diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py
index 313b6113..6a292420 100644
--- a/python/dune/perftool/pdelab/quadrature.py
+++ b/python/dune/perftool/pdelab/quadrature.py
@@ -189,12 +189,25 @@ def _estimate_quadrature_order():
     # Estimate polynomial degree of integrals of current type (eg 'Cell')
     integral_type = get_global_context_value("integral_type")
     integrals = form.integrals_by_type(integral_type)
-    polynomial_degree = 0
+
+    # Degree could be a tuple (for TensorProductElements)
+    degree = integrals[0].metadata()['estimated_polynomial_degree']
+    if isinstance(degree, int):
+        degree = (degree,)
+    polynomial_degree = [0, ] * len(degree)
+
     for i in integrals:
         degree = i.metadata()['estimated_polynomial_degree']
-        if isinstance(degree, tuple):
-            degree = max(degree)
-        polynomial_degree = max(polynomial_degree, degree)
+        if isinstance(degree, int):
+            degree = [degree, ]
+        assert(len(polynomial_degree) == len(degree))
+        for i in range(len(polynomial_degree)):
+            polynomial_degree[i] = max(polynomial_degree[i], degree[i])
+
+    # Return either a tuple or an int
+    polynomial_degree = tuple(polynomial_degree)
+    if len(polynomial_degree) == 1:
+        polynomial_degree = polynomial_degree[0]
 
     return polynomial_degree
 
@@ -202,14 +215,31 @@ def _estimate_quadrature_order():
 def quadrature_order():
     """Get quadrature order
 
-    Note: In PDELab quadrature order m means that integration of
-    polynomials of degree m is excat.
+    Notes:
+
+    - In PDELab quadrature order m means that integration of
+      polynomials of degree m is excat.
+
+    - If you sue sum factorization and TensorProductElement it is
+      possible to use a different quadrature_order per direction.
     """
     if get_option("quadrature_order"):
-        quadrature_order = int(get_option("quadrature_order"))
+        quadrature_order = tuple(map(int, get_option("quadrature_order").split(',')))
     else:
         quadrature_order = _estimate_quadrature_order()
 
+    # TensorProductElements can have different quadrature order for
+    # every direction so quadrature_order may be a tuple. If we do not
+    # use TensorProductElements we want to return an int.
+    if isinstance(quadrature_order, tuple):
+        if len(quadrature_order) == 1:
+            quadrature_order = quadrature_order[0]
+    if isinstance(quadrature_order, tuple):
+        if not get_option('sumfact'):
+            raise NotImplementedError("Different quadrature order per direction is only implemented for kernels using sum factorization.")
+        from dune.perftool.pdelab.geometry import world_dimension
+        assert(len(quadrature_order) == world_dimension())
+
     return quadrature_order
 
 
diff --git a/python/dune/perftool/pdelab/spaces.py b/python/dune/perftool/pdelab/spaces.py
index c9883dd6..706be5b9 100644
--- a/python/dune/perftool/pdelab/spaces.py
+++ b/python/dune/perftool/pdelab/spaces.py
@@ -6,6 +6,7 @@ from dune.perftool.generation import (class_member,
                                       generator_factory,
                                       include_file,
                                       preamble,
+                                      valuearg,
                                       )
 from dune.perftool.pdelab.restriction import restricted_name
 from dune.perftool.ufl.modified_terminals import Restriction
@@ -73,6 +74,7 @@ def define_lfs(name, father, child):
 @preamble
 def define_lfs_size(lfs, element, restriction):
     name = name_lfs_bound(lfs)
+    valuearg(name, dtype=numpy.int32)
     return "auto {} = {}.size();".format(name, lfs)
 
 
diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index 3559bdfc..1dd62bf2 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -43,6 +43,7 @@ import pymbolic.primitives as prim
 import ufl.classes as uc
 from ufl import FiniteElement, TensorProductElement
 
+
 @iname
 def _sumfact_iname(bound, _type, count):
     name = "sf_{}_{}".format(_type, str(count))
@@ -198,13 +199,17 @@ def generate_accumulation_instruction(expr, visitor):
     test_info = visitor.test_info
     trial_info = visitor.trial_info
 
+    # Number of basis functions per direction
     leaf_element = test_info.element
     if leaf_element.num_sub_elements() > 0 and not isinstance(leaf_element, TensorProductElement):
         leaf_element = leaf_element.extract_component(test_info.element_index)[1]
     degree = leaf_element._degree
-    if isinstance(degree, tuple):
-        degree = max(degree)
-    basis_size = degree + 1
+    if isinstance(degree, int):
+        degree = (degree,) * dim
+    basis_size = tuple(deg + 1 for deg in degree)
+
+    # Anisotropic finite elements are not (yet) supported by Dune
+    assert(size == basis_size[0] for size in basis_size)
 
     from dune.perftool.pdelab.localoperator import boundary_predicates
     predicates = boundary_predicates(expr,
@@ -318,7 +323,7 @@ def generate_accumulation_instruction(expr, visitor):
 
     # Collect the lfs and lfs indices for the accumulate call
     test_lfs.index = flatten_index(tuple(prim.Variable(i) for i in inames),
-                                   (basis_size,) * dim,
+                                   basis_size,
                                    order="f"
                                    )
 
@@ -329,7 +334,7 @@ def generate_accumulation_instruction(expr, visitor):
         from dune.perftool.sumfact.basis import _basis_functions_per_direction
         ansatz_lfs.index = flatten_index(tuple(prim.Variable(jacobian_inames[i])
                                                for i in range(world_dimension())),
-                                         (_basis_functions_per_direction(trial_leaf_element),) * dim,
+                                         _basis_functions_per_direction(trial_leaf_element),
                                          order="f"
                                          )
 
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index d117dc5b..bdb5badd 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -99,9 +99,15 @@ def _basis_functions_per_direction(element):
     from ufl import FiniteElement, TensorProductElement
     assert isinstance(element, (FiniteElement, TensorProductElement))
     degree = element.degree()
-    if isinstance(degree, tuple):
-        degree = max(degree)
-    return degree + 1
+    if isinstance(degree, int):
+        degree = (degree,) * world_dimension()
+
+    basis_size = tuple(deg + 1 for deg in degree)
+
+    # Anisotropic finite elements are not (yet) supported by Dune
+    assert(size == basis_size[0] for size in basis_size)
+
+    return basis_size
 
 
 @kernel_cached
@@ -185,6 +191,7 @@ def pymbolic_coefficient(element, restriction, index, coeff_func, visitor_indice
 
 @iname
 def sumfact_lfs_iname(element, bound, dim):
+    assert(isinstance(bound, int))
     from dune.perftool.pdelab.driver import FEM_name_mangling
     name = "sumfac_lfs_{}_{}".format(FEM_name_mangling(element), dim)
     domain(name, bound)
@@ -199,7 +206,7 @@ def lfs_inames(element, restriction, number=1, context=''):
         return ()
     else:
         dim = world_dimension()
-        return tuple(sumfact_lfs_iname(element, _basis_functions_per_direction(element), d) for d in range(dim))
+        return tuple(sumfact_lfs_iname(element, _basis_functions_per_direction(element)[d], d) for d in range(dim))
 
 
 @backend(interface="evaluate_basis", name="sumfact")
@@ -212,10 +219,18 @@ def evaluate_basis(element, name, restriction):
 
     # Collect the pairs of lfs/quad inames that are in use
     # On facets, the normal direction of the facet is excluded
-    tab = BasisTabulationMatrix(basis_size=_basis_functions_per_direction(element))
-    prod = tuple(tab.pymbolic((prim.Variable(i), prim.Variable(j)))
-                 for (i, j) in zip(quad_inames, tuple(iname for i, iname in enumerate(inames) if i != facedir))
-                 )
+    #
+    # If facedir is not none, the length of inames and quad_inames is
+    # different. For inames we want to skip the facedir direction, for
+    # quad_inames we need all entries. Thats the reason for the
+    # help_index.
+    basis_per_dir = _basis_functions_per_direction(element)
+    prod = ()
+    help_index = 0
+    for direction in range(len(inames)):
+        if direction != facedir:
+            prod = prod + (BasisTabulationMatrix(basis_size=basis_per_dir[direction]).pymbolic((prim.Variable(quad_inames[help_index]), prim.Variable(inames[direction]))),)
+            help_index += 1
 
     # Add the missing direction on facedirs by evaluating at either 0 or 1
     if facedir is not None:
@@ -266,7 +281,7 @@ def evaluate_reference_gradient(element, name, restriction, index):
     prod = []
     for i in range(dim):
         if i != facedir:
-            tab = BasisTabulationMatrix(basis_size=_basis_functions_per_direction(element),
+            tab = BasisTabulationMatrix(basis_size=_basis_functions_per_direction(element)[i],
                                         derivative=index == i)
             prod.append(tab.pymbolic((prim.Variable(quadinamemapping[i]), prim.Variable(inames[i]))))
 
diff --git a/python/dune/perftool/sumfact/quadrature.py b/python/dune/perftool/sumfact/quadrature.py
index a5ede367..d42296f6 100644
--- a/python/dune/perftool/sumfact/quadrature.py
+++ b/python/dune/perftool/sumfact/quadrature.py
@@ -10,13 +10,16 @@ from dune.perftool.generation import (backend,
                                       )
 
 from dune.perftool.sumfact.tabulation import (quadrature_points_per_direction,
+                                              local_quadrature_points_per_direction,
                                               name_oned_quadrature_points,
                                               name_oned_quadrature_weights,
                                               )
 from dune.perftool.pdelab.argument import name_accumulation_variable
 from dune.perftool.pdelab.geometry import (local_dimension,
+                                           world_dimension,
                                            )
 from dune.perftool.options import get_option
+from dune.perftool.sumfact.switch import get_facedir
 
 from loopy import CallMangleInfo
 from loopy.symbolic import FunctionIdentifier
@@ -80,52 +83,59 @@ def quadrature_inames(element):
     if element is None:
         names = tuple("quad_{}".format(d) for d in range(local_dimension()))
     else:
-        from ufl import FiniteElement
-        assert isinstance(element, FiniteElement)
+        from ufl import FiniteElement, TensorProductElement
+        assert isinstance(element, (FiniteElement, TensorProductElement))
         from dune.perftool.pdelab.driver import FEM_name_mangling
         names = tuple("quad_{}_{}".format(FEM_name_mangling(element), d) for d in range(local_dimension()))
-    domain(names, quadrature_points_per_direction())
+
+    local_qps_per_dir = local_quadrature_points_per_direction()
+    domain(names, local_qps_per_dir)
     return names
 
 
 @iname(kernel="operator")
 def constructor_quad_iname(name, d, bound):
-    name = "{}_{}".format(name, d)
-    domain(name, quadrature_points_per_direction(), kernel="operator")
+    name = "{}_localdim{}".format(name, d)
+    domain(name, bound, kernel="operator")
     return name
 
 
-def constructor_quadrature_inames(dim, num1d):
-    name = "quadiname_dim{}_num{}".format(dim, num1d)
-    return tuple(constructor_quad_iname(name, d, quadrature_points_per_direction()) for d in range(local_dimension()))
+def constructor_quadrature_inames(dim):
+    local_qps_per_dir = local_quadrature_points_per_direction()
+    local_qps_per_dir_str = '_'.join(map(str, local_qps_per_dir))
+    name = "quadiname_dim{}_num{}".format(dim, local_qps_per_dir_str)
+    return tuple(constructor_quad_iname(name, d, local_qps_per_dir[d]) for d in range(local_dimension()))
 
 
-def define_recursive_quadrature_weight(visitor, name, dir):
+def define_recursive_quadrature_weight(visitor, name, direction):
     info = visitor.current_info[1]
     if info is None:
         element = None
     else:
         element = info.element
-    iname = quadrature_inames(element)[dir]
+    iname = quadrature_inames(element)[direction]
     temporary_variable(name, shape=(), shape_impl=())
-    instruction(expression=Product((recursive_quadrature_weight(dir + 1),
-                                    Subscript(Variable(name_oned_quadrature_weights()),
+
+    qps_per_dir = quadrature_points_per_direction()
+    qp_bound = qps_per_dir[direction]
+    instruction(expression=Product((recursive_quadrature_weight(direction + 1),
+                                    Subscript(Variable(name_oned_quadrature_weights(qp_bound)),
                                               (Variable(iname),)
                                               ))
                                    ),
                 assignee=Variable(name),
-                forced_iname_deps=frozenset(quadrature_inames()[dir:]),
+                forced_iname_deps=frozenset(quadrature_inames()[direction:]),
                 forced_iname_deps_is_final=True,
                 tags=frozenset({"quad"}),
                 )
 
 
-def recursive_quadrature_weight(visitor, dir=0):
-    if dir == local_dimension():
+def recursive_quadrature_weight(visitor, direction=0):
+    if direction == local_dimension():
         return pymbolic_base_weight()
     else:
-        name = 'weight_{}'.format(dir)
-        define_recursive_quadrature_weight(visitor, name, dir)
+        name = 'weight_{}'.format(direction)
+        define_recursive_quadrature_weight(visitor, name, direction)
         return Variable(name)
 
 
@@ -134,14 +144,16 @@ def quadrature_weight(visitor):
     if not get_option("precompute_quadrature_info"):
         return recursive_quadrature_weight(visitor)
 
+    # Quadrature points per (local) direction
     dim = local_dimension()
-    num1d = quadrature_points_per_direction()
-    name = "quad_weights_dim{}_num{}".format(dim, num1d)
+    local_qps_per_dir = local_quadrature_points_per_direction()
+    local_qps_per_dir_str = '_'.join(map(str, local_qps_per_dir))
+    name = "quad_weights_dim{}_num{}".format(dim, local_qps_per_dir_str)
 
     # Add a class member
     loopy_class_member(name,
                        dtype=np.float64,
-                       shape=(num1d,) * dim,
+                       shape=local_qps_per_dir,
                        classtag="operator",
                        dim_tags=",".join(["f"] * dim),
                        managed=True,
@@ -149,9 +161,12 @@ def quadrature_weight(visitor):
                        )
 
     # Precompute it in the constructor
-    inames = constructor_quadrature_inames(dim, num1d)
-    instruction(assignee=prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames)),
-                expression=prim.Product(tuple(Subscript(Variable(name_oned_quadrature_weights()), (prim.Variable(i),)) for i in inames)),
+    inames = constructor_quadrature_inames(dim)
+    assignee = prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames))
+    expression = prim.Product(tuple(Subscript(Variable(name_oned_quadrature_weights(local_qps_per_dir[index])),
+                                              (prim.Variable(iname),)) for index, iname in enumerate(inames)))
+    instruction(assignee=assignee,
+                expression=expression,
                 within_inames=frozenset(inames),
                 kernel="operator",
                 )
@@ -165,7 +180,11 @@ def quadrature_weight(visitor):
 
 
 def define_quadrature_position(name, index):
-    instruction(expression=Subscript(Variable(name_oned_quadrature_points()), (Variable(quadrature_inames()[index]),)),
+    qps_per_dir = quadrature_points_per_direction()
+    qp_bound = qps_per_dir[index]
+    expression = Subscript(Variable(name_oned_quadrature_points(qp_bound)),
+                           (Variable(quadrature_inames()[index]),))
+    instruction(expression=expression,
                 assignee=Subscript(Variable(name), (index,)),
                 forced_iname_deps=frozenset(quadrature_inames(element)),
                 forced_iname_deps_is_final=True,
@@ -184,12 +203,14 @@ def pymbolic_quadrature_position(index, visitor):
 
     assert isinstance(index, int)
     dim = local_dimension()
-    num1d = quadrature_points_per_direction()
-    name = "quad_points_dim{}_num{}_dir{}".format(dim, num1d, index)
+    qps_per_dir = quadrature_points_per_direction()
+    local_qps_per_dir = local_quadrature_points_per_direction()
+    local_qps_per_dir_str = '_'.join(map(str, local_qps_per_dir))
+    name = "quad_points_dim{}_num{}_dir{}".format(dim, local_qps_per_dir_str, index)
 
     loopy_class_member(name,
                        dtype=np.float64,
-                       shape=(num1d,) * dim,
+                       shape=local_qps_per_dir,
                        classtag="operator",
                        dim_tags=",".join(["f"] * dim),
                        managed=True,
@@ -197,9 +218,12 @@ def pymbolic_quadrature_position(index, visitor):
                        )
 
     # Precompute it in the constructor
-    inames = constructor_quadrature_inames(dim, num1d)
-    instruction(assignee=prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames)),
-                expression=Subscript(Variable(name_oned_quadrature_points()), (prim.Variable(inames[index]))),
+    inames = constructor_quadrature_inames(dim)
+    assignee = prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames))
+    expression = Subscript(Variable(name_oned_quadrature_points(qps_per_dir[index])),
+                           (prim.Variable(inames[index])))
+    instruction(assignee=assignee,
+                expression=expression,
                 within_inames=frozenset(inames),
                 kernel="operator",
                 )
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 288374da..a0cc1054 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -4,7 +4,7 @@ from dune.perftool.options import get_option
 from dune.perftool.generation import get_counted_variable
 from dune.perftool.pdelab.geometry import local_dimension, world_dimension
 from dune.perftool.sumfact.quadrature import quadrature_inames
-from dune.perftool.sumfact.tabulation import BasisTabulationMatrixArray
+from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase, BasisTabulationMatrixArray
 
 from pytools import ImmutableRecord, product
 
@@ -107,7 +107,6 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
         """
         # Assert the inputs!
         assert isinstance(matrix_sequence, tuple)
-        from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase
         assert all(isinstance(m, BasisTabulationMatrixBase) for m in matrix_sequence)
 
         assert stage in (1, 3)
diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py
index 736f64b2..9624f1a9 100644
--- a/python/dune/perftool/sumfact/tabulation.py
+++ b/python/dune/perftool/sumfact/tabulation.py
@@ -3,7 +3,7 @@ from dune.perftool.ufl.modified_terminals import Restriction
 from dune.perftool.options import get_option
 
 from dune.perftool.pdelab.argument import name_coefficientcontainer
-from dune.perftool.pdelab.geometry import world_dimension
+from dune.perftool.pdelab.geometry import world_dimension, local_dimension
 from dune.perftool.generation import (class_member,
                                       domain,
                                       function_mangler,
@@ -51,11 +51,13 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord):
                  slice_size=None,
                  slice_index=None,
                  ):
+        assert(isinstance(basis_size, int))
         if quadrature_size is None:
             quadrature_size = quadrature_points_per_direction()
+            assert(qp == quadrature_size[0] for qp in quadrature_size)
+            quadrature_size = quadrature_size[0]
         if slice_size is not None:
             quadrature_size = ceildiv(quadrature_size, slice_size)
-
         ImmutableRecord.__init__(self,
                                  quadrature_size=quadrature_size,
                                  basis_size=basis_size,
@@ -81,13 +83,14 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord):
             return self.basis_size
 
     def pymbolic(self, indices):
-        name = "{}{}Theta{}{}_qp{}_dof{}".format("face{}_".format(self.face) if self.face is not None else "",
-                                                 "d" if self.derivative else "",
-                                                 "T" if self.transpose else "",
-                                                 "_slice{}".format(self.slice_index) if self.slice_size is not None else "",
-                                                 self.quadrature_size,
-                                                 self.basis_size,
-                                                 )
+        name = "{}{}Theta{}{}_qp{}_dof{}" \
+               .format("face{}_".format(self.face) if self.face is not None else "",
+                       "d" if self.derivative else "",
+                       "T" if self.transpose else "",
+                       "_slice{}".format(self.slice_index) if self.slice_size is not None else "",
+                       self.quadrature_size,
+                       self.basis_size,
+                       )
         define_theta(name, self)
         return prim.Subscript(prim.Variable(name), indices)
 
@@ -199,51 +202,68 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase):
 
 
 def quadrature_points_per_direction():
-    # Quadrature order
+    # Quadrature order per direction
     q = quadrature_order()
+    if isinstance(q, int):
+        q = (q,) * world_dimension()
 
     # Quadrature points in per direction
-    nb_qp = q // 2 + 1
+    nb_qp = tuple([(lambda x: x // 2 + 1)(order) for order in q])
 
     return nb_qp
 
 
+def local_quadrature_points_per_direction():
+    """On a volume this simply gives the number of quadrature points per
+    direction. On a face it only returns the worlddim - 1 number of
+    quadrature points belonging to this face. (Delete normal direction).
+    """
+    qps_per_dir = quadrature_points_per_direction()
+    if local_dimension() != world_dimension():
+        facedir = get_global_context_value('facedir_s')
+        assert(get_global_context_value('facedir_s') == get_global_context_value('facedir_n') or
+               get_global_context_value('integral_type') == 'exterior_facet')
+        qps_per_dir = qps_per_dir[:facedir] + qps_per_dir[facedir + 1:]
+    return qps_per_dir
+
+
 def polynomial_degree():
     form = get_global_context_value("formdata").preprocessed_form
-    return form.coefficients()[0].ufl_element()._degree
+    degree = form.coefficients()[0].ufl_element()._degree
+    if isinstance(degree, int):
+        degree = (degree,) * world_dimension()
+    return degree
 
 
 def basis_functions_per_direction():
-    return polynomial_degree() + 1
+    return tuple(degree + 1 for degree in polynomial_degree())
 
 
-def define_oned_quadrature_weights(name):
+def define_oned_quadrature_weights(name, bound):
     loopy_class_member(name,
                        dtype=np.float64,
                        classtag="operator",
-                       shape=(quadrature_points_per_direction(),),
+                       shape=(bound,),
                        )
 
 
-def name_oned_quadrature_weights():
-    num = quadrature_points_per_direction()
-    name = "qw_num{}".format(num)
-    define_oned_quadrature_weights(name)
+def name_oned_quadrature_weights(bound):
+    name = "qw_num{}".format(bound)
+    define_oned_quadrature_weights(name, bound)
     return name
 
 
-def define_oned_quadrature_points(name):
+def define_oned_quadrature_points(name, bound):
     loopy_class_member(name,
                        dtype=np.float64,
                        classtag="operator",
-                       shape=(quadrature_points_per_direction(),),
+                       shape=(bound,),
                        )
 
 
-def name_oned_quadrature_points():
-    num = quadrature_points_per_direction()
-    name = "qp_num{}".format(num)
-    define_oned_quadrature_points(name)
+def name_oned_quadrature_points(bound):
+    name = "qp_num{}".format(bound)
+    define_oned_quadrature_points(name, bound)
     return name
 
 
@@ -284,12 +304,12 @@ def name_polynomials(degree):
 
 
 @preamble(kernel="operator")
-def sort_quadrature_points_weights(qp, qw):
+def sort_quadrature_points_weights(qp, qw, bound):
     range_field = lop_template_range_field()
     domain_field = name_domain_field()
-    number_qp = quadrature_points_per_direction()
     include_file("dune/perftool/sumfact/onedquadrature.hh", filetag="operatorfile")
-    return "onedQuadraturePointsWeights<{}, {}, {}>({}, {});".format(range_field, domain_field, number_qp, qp, qw)
+    return "onedQuadraturePointsWeights<{}, {}, {}>({}, {});" \
+        .format(range_field, domain_field, bound, qp, qw)
 
 
 @iname(kernel="operator")
@@ -325,9 +345,12 @@ def polynomial_lookup_mangler(target, func, dtypes):
 
 def define_theta(name, tabmat, additional_indices=(), width=None):
     assert isinstance(tabmat, BasisTabulationMatrix)
-    qp = name_oned_quadrature_points()
-    qw = name_oned_quadrature_weights()
-    sort_quadrature_points_weights(qp, qw)
+    bound = tabmat.quadrature_size
+    if tabmat.slice_size is not None:
+        bound *= tabmat.slice_size
+    qp = name_oned_quadrature_points(bound)
+    qw = name_oned_quadrature_weights(bound)
+    sort_quadrature_points_weights(qp, qw, bound)
     degree = tabmat.basis_size - 1
     polynomials = name_polynomials(degree)
 
@@ -370,17 +393,18 @@ def define_theta(name, tabmat, additional_indices=(), width=None):
 def construct_basis_matrix_sequence(transpose=False, derivative=None, facedir=None, facemod=None, basis_size=None):
     dim = world_dimension()
     result = [None] * dim
+    quadrature_size = quadrature_points_per_direction()
+    assert (basis_size is not None)
+    if facedir is not None:
+        quadrature_size = quadrature_size[:facedir] + (1,) + quadrature_size[facedir:]
 
     for i in range(dim):
-        quadrature_size = quadrature_points_per_direction()
-
         onface = None
         if facedir == i:
-            quadrature_size = 1
             onface = facemod
 
-        result[i] = BasisTabulationMatrix(quadrature_size=quadrature_size,
-                                          basis_size=basis_size,
+        result[i] = BasisTabulationMatrix(quadrature_size=quadrature_size[i],
+                                          basis_size=basis_size[i],
                                           transpose=transpose,
                                           derivative=derivative == i,
                                           face=onface)
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 790fa59a..d5e85529 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -9,7 +9,6 @@ from dune.perftool.generation import (generator_factory,
 from dune.perftool.pdelab.restriction import (Restriction,
                                               restricted_name,
                                               )
-from dune.perftool.sumfact.tabulation import BasisTabulationMatrixArray
 from dune.perftool.error import PerftoolError
 from dune.perftool.options import get_option
 
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 7086be2d..9b36e9cc 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -99,7 +99,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         leaf_element = o.ufl_element()
 
         # Select the correct leaf element in the case of this being a mixed finite element
-        if o.ufl_element().num_sub_elements() > 0 and not isinstance(o.ufl_element(),TensorProductElement):
+        if o.ufl_element().num_sub_elements() > 0 and not isinstance(o.ufl_element(), TensorProductElement):
             index = self.indices[0]
             assert isinstance(index, int)
             self.indices = self.indices[1:]
diff --git a/test/sumfact/poisson/poisson_2d.mini b/test/sumfact/poisson/poisson_2d.mini
index ea689468..96a863df 100644
--- a/test/sumfact/poisson/poisson_2d.mini
+++ b/test/sumfact/poisson/poisson_2d.mini
@@ -18,6 +18,7 @@ numerical_jacobian = 1, 0 | expand num
 compare_l2errorsquared = 4e-5, 4e-9 | expand deg
 sumfact = 1
 vectorize_grads = 1, 0 | expand grad
+# quadrature_order = 2, 4
 
 [formcompiler.ufl_variants]
 degree = 1, 2 | expand deg
-- 
GitLab