diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt
index 5dd2d517990a94ddc4e390671732479f4c517c5f..d334abb756adf52eb3186eb05deb088331c21d89 100644
--- a/bin/CMakeLists.txt
+++ b/bin/CMakeLists.txt
@@ -2,4 +2,6 @@ dune_python_install_package(PATH .)
 
 dune_symlink_to_source_files(FILES make_graph.sh
                                    knltimings.sh
-                             )
+                                   )
+
+add_subdirectory(analyzegrid)
diff --git a/bin/analyzegrid/CMakeLists.txt b/bin/analyzegrid/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e96704bb1c34086a54e4ed7d537804b578f10f4
--- /dev/null
+++ b/bin/analyzegrid/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(analyze_grid analyze_grid.cc)
+dune_symlink_to_source_files(FILES test_2d_structured.ini test_3d_structured.ini test_2d_unstructured.ini test_3d_unstructured.ini)
diff --git a/bin/analyzegrid/analyze_grid.cc b/bin/analyzegrid/analyze_grid.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5ceed213057f56917deafd37cc0f9ccf3722f703
--- /dev/null
+++ b/bin/analyzegrid/analyze_grid.cc
@@ -0,0 +1,151 @@
+#include "config.h"
+
+#include <algorithm>
+
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/common/parametertree.hh>
+#include "dune/common/parametertreeparser.hh"
+#include <dune/grid/uggrid.hh>
+#include <dune/grid/yaspgrid.hh>
+#include <dune/pdelab/gridfunctionspace/gridfunctionspace.hh>
+#include <dune/testtools/gridconstruction.hh>
+
+#include <dune/consistent-edge-orientation/createconsistentgrid.hh>
+
+#include <dune/perftool/sumfact/analyzegrid.hh>
+
+int main(int argc, char** argv){
+  try
+  {
+    if (argc != 3){
+      std::cout << "Need ini file and output filename." << std::endl;
+      return 1;
+    }
+
+    // Initialize basic stuff...
+    Dune::MPIHelper& mpihelper = Dune::MPIHelper::instance(argc, argv);
+    using RangeType = double;
+    Dune::ParameterTree initree;
+    Dune::ParameterTreeParser::readINITree(argv[1], initree);
+
+    bool quadrilateral = false;
+    std::string elementType = initree.get("elementType", "");
+    if (elementType == "quadrilateral"){
+      quadrilateral = true;
+    }
+    quadrilateral = quadrilateral || initree.hasKey("cells");
+    std::cout << "quadrilateral: " << quadrilateral << std::endl;
+
+    bool unstructured = initree.get("formcompiler.grid_unstructured", false);
+    std::cout << "unstructured: " << unstructured << std::endl;
+
+    std::string filename = std::string(argv[2]);
+    std::cout << "writing into: " << filename << std::endl;
+
+    if (quadrilateral){
+      if (unstructured){
+        std::vector<int> tmp;
+        std::vector<int> dim_counter = initree.get("elements", tmp);
+        const int dim = dim_counter.size();
+        std::cout << "dim: " << dim << std::endl;
+
+        if (dim == 2){
+          std::cout << "Analyse 2d unstructured quadrilateral grid" << std::endl;
+
+          // Setup grid (view)...
+          using Grid = Dune::UGGrid<2>;
+          using GV = Grid::LeafGridView;
+          IniGridFactory<Grid> factory(initree);
+          std::shared_ptr<Grid> grid_nonconsistent = factory.getGrid();
+          std::shared_ptr<Grid> grid = createConsistentGrid(grid_nonconsistent);
+          GV gv = grid->leafGridView();
+
+          // Extract facemod/facedir intersection variation
+          using ES = Dune::PDELab::AllEntitySet<GV>;
+          ES es(gv);
+          analyze_grid(es, filename);
+        }
+        else if (dim == 3){
+          std::cout << "Analyse 3d unstructured quadrilateral grid" << std::endl;
+
+          // Setup grid (view)...
+          using Grid = Dune::UGGrid<3>;
+          using GV = Grid::LeafGridView;
+          IniGridFactory<Grid> factory(initree);
+          std::shared_ptr<Grid> grid_nonconsistent = factory.getGrid();
+          std::shared_ptr<Grid> grid = createConsistentGrid(grid_nonconsistent);
+          GV gv = grid->leafGridView();
+
+          // Extract facemod/facedir intersection variation
+          using ES = Dune::PDELab::AllEntitySet<GV>;
+          ES es(gv);
+          analyze_grid(es, filename);
+        }
+        else{
+          assert(false);
+        }
+      }
+      else {
+        std::vector<int> tmp;
+        std::vector<int> dim_counter = initree.get("cells", tmp);
+        const int dim = dim_counter.size();
+        std::cout << "dim: " << dim << std::endl;
+
+        if (dim == 2){
+          std::cout << "Analyse 2d structured quadrilateral grid" << std::endl;
+
+          // For structured grids we already know the variation beforehand. This
+          // is only implemented to cover all the cases.
+
+          // Setup grid (view)...
+          using Grid = Dune::YaspGrid<2>;
+          using GV = Grid::LeafGridView;
+          IniGridFactory<Grid> factory(initree);
+          std::shared_ptr<Grid> grid = factory.getGrid();
+          GV gv = grid->leafGridView();
+
+          // Extract facemod/facedir intersection variation
+          using ES = Dune::PDELab::AllEntitySet<GV>;
+          ES es(gv);
+          analyze_grid(es, filename);
+        }
+        if (dim == 3){
+          std::cout << "Analyse 3d structured quadrilateral grid" << std::endl;
+
+          // For structured grids we already know the variation beforehand. This
+          // is only implemented to cover all the cases.
+
+          // Setup grid (view)...
+          using Grid = Dune::YaspGrid<3>;
+          using GV = Grid::LeafGridView;
+          IniGridFactory<Grid> factory(initree);
+          std::shared_ptr<Grid> grid = factory.getGrid();
+          GV gv = grid->leafGridView();
+
+          // Extract facemod/facedir intersection variation
+          using ES = Dune::PDELab::AllEntitySet<GV>;
+          ES es(gv);
+          analyze_grid(es, filename);
+        }
+        else{
+          assert (false);
+        }
+      }
+    }
+    else{
+      // We can't do sum factorization on simplicial meshes
+      assert (false);
+    }
+
+    return 0;
+
+  }
+  catch (Dune::Exception& e)
+  {    std::cerr << "Dune reported error: " << e << std::endl;
+    return 1;
+  }
+  catch (std::exception& e)
+  {    std::cerr << "Unknown exception thrown!" << std::endl;
+    return 1;
+  }
+}
diff --git a/bin/analyzegrid/test_2d_structured.ini b/bin/analyzegrid/test_2d_structured.ini
new file mode 100644
index 0000000000000000000000000000000000000000..327827a8d79439a4f6988e1fe0bedda658a1be5c
--- /dev/null
+++ b/bin/analyzegrid/test_2d_structured.ini
@@ -0,0 +1,24 @@
+cells = 16 16
+deg_suffix = deg2
+diff_suffix = symdiff
+extension = 1. 1.
+gradvec_suffix = nongradvec
+quadvec_suffix = nonquadvec
+
+[formcompiler]
+compare_l2errorsquared = 5e-7
+
+[formcompiler.r]
+numerical_jacobian = 0
+sumfact = 1
+vectorization_quadloop = 0
+vectorization_strategy = none
+
+[formcompiler.ufl_variants]
+degree = 2
+
+[wrapper]
+
+[wrapper.vtkcompare]
+extension = vtu
+name = sumfact_poisson_dg_2d_deg2_symdiff_nonquadvec_nongradvec
diff --git a/bin/analyzegrid/test_2d_unstructured.ini b/bin/analyzegrid/test_2d_unstructured.ini
new file mode 100644
index 0000000000000000000000000000000000000000..c8f4c25aabe980939b9740877a7af4cc0797dbdb
--- /dev/null
+++ b/bin/analyzegrid/test_2d_unstructured.ini
@@ -0,0 +1,27 @@
+deg_suffix = deg2
+diff_suffix = symdiff
+elementType = quadrilateral
+elements = 16 16
+gradvec_suffix = nongradvec
+lowerleft = 0.0 0.0
+quadvec_suffix = nonquadvec
+upperright = 1.0 1.0
+
+[formcompiler]
+compare_l2errorsquared = 5e-5
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 0
+sumfact = 1
+vectorization_quadloop = 0
+vectorization_strategy = none
+
+[formcompiler.ufl_variants]
+degree = 2
+
+[wrapper]
+
+[wrapper.vtkcompare]
+extension = vtu
+name = sumfact_poisson_dg_2d_unstructured_deg2_symdiff_nonquadvec_nongradvec
diff --git a/bin/analyzegrid/test_3d_structured.ini b/bin/analyzegrid/test_3d_structured.ini
new file mode 100644
index 0000000000000000000000000000000000000000..83fc69b5d0ba1b39dec597c719ea5564d9352eee
--- /dev/null
+++ b/bin/analyzegrid/test_3d_structured.ini
@@ -0,0 +1,24 @@
+cells = 8 8 8
+deg_suffix = deg2
+diff_suffix = symdiff
+extension = 1. 1. 1.
+gradvec_suffix = nongradvec
+quadvec_suffix = nonquadvec
+
+[formcompiler]
+compare_l2errorsquared = 5e-6
+
+[formcompiler.r]
+numerical_jacobian = 0
+sumfact = 1
+vectorization_quadloop = 0
+vectorization_strategy = none
+
+[formcompiler.ufl_variants]
+degree = 2
+
+[wrapper]
+
+[wrapper.vtkcompare]
+extension = vtu
+name = sumfact_poisson_dg_3d_deg2_symdiff_nonquadvec_nongradvec
diff --git a/bin/analyzegrid/test_3d_unstructured.ini b/bin/analyzegrid/test_3d_unstructured.ini
new file mode 100644
index 0000000000000000000000000000000000000000..9f4e53b240eb5475a2368ef241df1348f3031540
--- /dev/null
+++ b/bin/analyzegrid/test_3d_unstructured.ini
@@ -0,0 +1,28 @@
+deg_suffix = deg2
+diff_suffix = symdiff
+elementType = quadrilateral
+elements = 8 8 8
+gradvec_suffix = nongradvec
+lowerleft = 0.0 0.0 0.0
+quadvec_suffix = nonquadvec
+upperright = 1.0 1.0 1.0
+
+[formcompiler]
+compare_l2errorsquared = 5e-6
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 0
+sumfact = 1
+sumfact_regular_jacobians = 1
+vectorization_quadloop = 0
+vectorization_strategy = none
+
+[formcompiler.ufl_variants]
+degree = 2
+
+[wrapper]
+
+[wrapper.vtkcompare]
+extension = vtu
+name = sumfact_poisson_dg_3d_unstructured_deg2_symdiff_nonquadvec_nongradvec
diff --git a/cmake/modules/DunePerftoolMacros.cmake b/cmake/modules/DunePerftoolMacros.cmake
index ca33bfae93d7bcedc198d864518f424944924efb..d5557d410333edfe2a292c9ceb668195012440ea 100644
--- a/cmake/modules/DunePerftoolMacros.cmake
+++ b/cmake/modules/DunePerftoolMacros.cmake
@@ -71,7 +71,7 @@ endif()
 file(GLOB_RECURSE UFL2PDELAB_SOURCES ${UFL2PDELAB_GLOB_PATTERN})
 
 function(add_generated_executable)
-  set(OPTIONS EXCLUDE_FROM_ALL)
+  set(OPTIONS EXCLUDE_FROM_ALL ANALYZE_GRID)
   set(SINGLE TARGET SOURCE UFLFILE INIFILE)
   set(MULTI FORM_COMPILER_ARGS DEPENDS)
   include(CMakeParseArguments)
@@ -115,6 +115,18 @@ function(add_generated_executable)
     set(GEN_EXCLUDE_FROM_ALL "")
   endif()
 
+  # Process analyze grid option
+  set(ANALYZE_GRID_FILE)
+  set(ANALYZE_GRID_OPTION)
+  if(GEN_ANALYZE_GRID)
+    set(ANALYZE_GRID_FILE "${GEN_INIFILE}.csv")
+    set(ANALYZE_GRID_OPTION "--grid-info=${ANALYZE_GRID_FILE}")
+    add_custom_command(OUTPUT ${ANALYZE_GRID_FILE}
+                       COMMAND ${CMAKE_BINARY_DIR}/bin/analyzegrid/analyze_grid ${GEN_INIFILE} ${ANALYZE_GRID_FILE}
+                       COMMENT "Analyzing the grid for target ${GEN_TARGET}..."
+                       )
+  endif()
+
   # Parse a mapping of operators to build and their respective filenames
   dune_execute_process(COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env python ${perftool_path}/deplist.py ${GEN_INIFILE} ${GEN_TARGET}
                        OUTPUT_VARIABLE depdata
@@ -132,7 +144,8 @@ function(add_generated_executable)
                                --ini-file ${GEN_INIFILE}
                                --target-name ${GEN_TARGET}
                                --operator-to-build ${op}
-                       DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_PERFTOOL_ADDITIONAL_PYTHON_SOURCES}
+                               ${ANALYZE_GRID_OPTION}
+                       DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_PERFTOOL_ADDITIONAL_PYTHON_SOURCES} ${ANALYZE_GRID_FILE}
                        COMMENT "Generating operator file ${depdata___${op}} for the target ${GEN_TARGET}"
                        )
     set(header_deps ${header_deps} ${depdata___${op}})
diff --git a/cmake/modules/GeneratedSystemtests.cmake b/cmake/modules/GeneratedSystemtests.cmake
index 2b1d87728c04a9359df79f3711c0df9c4f3f621e..41975df0e51fb575136c0c05510c9fb0eb7b5adc 100644
--- a/cmake/modules/GeneratedSystemtests.cmake
+++ b/cmake/modules/GeneratedSystemtests.cmake
@@ -3,7 +3,7 @@
 
 function(dune_add_formcompiler_system_test)
   # parse arguments
-  set(OPTION DEBUG NO_TESTS)
+  set(OPTION DEBUG NO_TESTS ANALYZE_GRID)
   set(SINGLE INIFILE BASENAME SCRIPT UFLFILE SOURCE)
   set(MULTI CREATED_TARGETS)
   cmake_parse_arguments(SYSTEMTEST "${OPTION}" "${SINGLE}" "${MULTI}" ${ARGN})
@@ -21,6 +21,10 @@ function(dune_add_formcompiler_system_test)
   if(SYSTEMTEST_SOURCE)
     set(SOURCE SOURCE ${SYSTEMTEST_SOURCE})
   endif()
+  set(ANALYZE_GRID_STR "")
+  if(SYSTEMTEST_ANALYZE_GRID)
+    set(ANALYZE_GRID_STR "ANALYZE_GRID")
+  endif()
 
   # set a default for the script. call_executable.py just calls the executable.
   # There, it is also possible to hook in things depending on the inifile
@@ -57,6 +61,7 @@ function(dune_add_formcompiler_system_test)
                              DEPENDS ${SYSTEMTEST_INIFILE}
                              EXCLUDE_FROM_ALL
                              ${SOURCE}
+                             ${ANALYZE_GRID_STR}
                              )
 
     # Enrich the target with preprocessor variables from the __static section
diff --git a/dune.module b/dune.module
index c6a0e7bb6e053b8d274511f4dbb54d9d2eab84c4..1ae974397a480f5e80e4beb88007ae68468c0073 100644
--- a/dune.module
+++ b/dune.module
@@ -8,4 +8,4 @@ Version: 0.0
 Maintainer: dominic.kempf@iwr.uni-heidelberg.de
 #depending on
 Depends: dune-testtools dune-pdelab dune-alugrid
-Suggests: dune-opcounter
+Suggests: dune-opcounter dune-uggrid consistent-edge-orientation
diff --git a/dune/perftool/sumfact/analyzegrid.hh b/dune/perftool/sumfact/analyzegrid.hh
new file mode 100644
index 0000000000000000000000000000000000000000..c646616110b1b3bfb2a29e497eeacb3d58a60dbd
--- /dev/null
+++ b/dune/perftool/sumfact/analyzegrid.hh
@@ -0,0 +1,139 @@
+#ifndef DUNE_PERFTOOL_SUMFACT_ANALYZEGRID_HH
+#define DUNE_PERFTOOL_SUMFACT_ANALYZEGRID_HH
+
+
+#include <dune/pdelab/common/intersectiontype.hh>
+
+
+struct GridInfo{
+  int dim;
+  int number_of_elements;
+  int number_of_skeleton_faces;
+  int number_of_boundary_faces;
+  std::set<std::size_t> skeleton_variants;
+  std::set<std::size_t> boundary_variants;
+
+  void export_grid_info(std::string);
+
+  friend std::ostream & operator<<(std::ostream &os, const GridInfo& grid_info);
+};
+
+void GridInfo::export_grid_info(std::string filename){
+  std::ofstream output_file;
+  output_file.open(filename);
+  for (std::size_t var : skeleton_variants){
+    std::size_t index_s = var / (2 * dim);
+    std::size_t facedir_s = index_s / 2;
+    std::size_t facemod_s = index_s % 2;
+
+    std::size_t index_n = var % (2 * dim);
+    std::size_t facedir_n = index_n / 2;
+    std::size_t facemod_n = index_n % 2;
+
+    output_file << "skeleton" << " "
+                << facedir_s << " "
+                << facemod_s << " "
+                << facedir_n << " "
+                << facemod_n << std::endl;
+  }
+  for (std::size_t var : boundary_variants){
+    std::size_t index_s = var;
+    std::size_t facedir_s = index_s / 2;
+    std::size_t facemod_s = index_s % 2;
+
+    output_file << "boundary" << " "
+                << facedir_s << " "
+                << facemod_s << " "
+                << -1 << " "
+                << -1 << std::endl;
+  }
+  output_file.close();
+}
+
+std::ostream & operator<<(std::ostream &os, const GridInfo& grid_info){
+  return os << "== Print GridInfo" << std::endl
+            << "Dimension: " << grid_info.dim << std::endl
+            << "Number of elements: " << grid_info.number_of_elements << std::endl
+            << std::endl
+            << "Number of skeleton faces: " << grid_info.number_of_skeleton_faces << std::endl
+            << "Number of skeleton variants: " << grid_info.skeleton_variants.size() << std::endl
+            << std::endl
+            << "Number of boundary faces: " << grid_info.number_of_boundary_faces << std::endl
+            << "Number of boundary variants: " << grid_info.boundary_variants.size() << std::endl;
+}
+
+
+template <typename ES>
+void analyze_grid(const ES& es, std::string filename){
+  const int dim = ES::dimension;
+
+  GridInfo grid_info;
+  grid_info.dim = dim;
+  grid_info.number_of_elements = 0;
+  grid_info.number_of_skeleton_faces = 0;
+  grid_info.number_of_boundary_faces = 0;
+
+
+  auto& index_set = es.indexSet();
+  for (const auto& element : elements(es)){
+    grid_info.number_of_elements += 1;
+
+    auto ids = index_set.index(element);
+
+    for (const auto& intersection : intersections(es, element)){
+      auto intersection_data = Dune::PDELab::classifyIntersection(es, intersection);
+      auto intersection_type = std::get<0>(intersection_data);
+      auto& outside_element = std::get<1>(intersection_data);
+
+      switch(intersection_type){
+      case Dune::PDELab::IntersectionType::skeleton :
+      {
+        auto idn = index_set.index(outside_element);
+        bool first_visit = ids > idn;
+        if (first_visit){
+          grid_info.number_of_skeleton_faces += 1;
+
+          size_t facedir_s = intersection.indexInInside() / 2 ;
+          size_t facemod_s = intersection.indexInInside() % 2 ;
+          size_t facedir_n = intersection.indexInOutside() / 2 ;
+          size_t facemod_n = intersection.indexInOutside() % 2 ;
+
+          // std::cout << facedir_s << " "
+          //           << facemod_s << " "
+          //           << facedir_n << " "
+          //           << facemod_n << std::endl;
+
+          size_t variant = intersection.indexInOutside() + 2 * dim * intersection.indexInInside();
+          grid_info.skeleton_variants.insert(variant);
+        }
+
+        break;
+      }
+      case Dune::PDELab::IntersectionType::periodic :
+      {
+        assert (false);
+        break;
+      }
+      case Dune::PDELab::IntersectionType::boundary :
+      {
+        grid_info.number_of_boundary_faces += 1;
+
+        size_t variant = intersection.indexInInside();
+        grid_info.boundary_variants.insert(variant);
+
+        break;
+      }
+      case Dune::PDELab::IntersectionType::processor :
+      {
+        assert (false);
+        break;
+      }
+      }
+    }
+  }
+
+  grid_info.export_grid_info(filename);
+  std::cout << grid_info << std::endl;
+}
+
+#endif
diff --git a/dune/perftool/sumfact/invertgeometry.hh b/dune/perftool/sumfact/invertgeometry.hh
new file mode 100644
index 0000000000000000000000000000000000000000..a20708e8a2c305a35d642124b3c01a2ad0d387f6
--- /dev/null
+++ b/dune/perftool/sumfact/invertgeometry.hh
@@ -0,0 +1,80 @@
+#ifndef DUNE_PERFTOOL_SUMFACT_INVERTGEOMETRY_HH
+#define DUNE_PERFTOOL_SUMFACT_INVERTGEOMETRY_HH
+
+
+template<typename T>
+inline T invert_and_return_determinant(const T a00, const T a10, const T a01, const T a11, T inverse[4]){
+  T det = a00 * a11 - a10 * a01;
+  assert (std::abs(det) > 1e-12);
+
+  inverse[0] = a11 / det;
+  inverse[1] = -a10 / det;
+  inverse[2] = -a01 / det;
+  inverse[3] = a00 / det;
+
+  return std::abs(det);
+}
+
+
+template<typename T>
+inline T invert_and_return_determinant(const T a00, const T a10, const T a20,
+                                       const T a01, const T a11, const T a21,
+                                       const T a02, const T a12, const T a22,
+                                       T inverse[9]){
+  T t4 = a00 * a11;
+  T t6 = a00 * a12;
+  T t8  = a01 * a10;
+  T t10 = a02 * a10;
+  T t12 = a01 * a20;
+  T t14 = a02 * a20;
+
+  T det = t4 * a22;
+  det -= t6 * a21;
+  det -= t8 * a22;
+  det += t10 * a21;
+  det += t12 * a12;
+  det -= t14 * a11;
+
+  assert (std::abs(det) > 1e-12);
+
+  T t17 = 1.0/det;
+
+  inverse[0] = a11 * a22;
+  inverse[0] -= a12 * a21;
+  inverse[0] *= t17;
+
+  inverse[3] = a02 * a21;
+  inverse[3] -= a01 * a22;
+  inverse[3] *= t17;
+
+  inverse[6] = a01 * a12;
+  inverse[6] -= a02 * a11;
+  inverse[6] *= t17;
+
+  inverse[1] = a12 * a20;
+  inverse[1] -= a10 * a22;
+  inverse[1] *= t17;
+
+  inverse[4] = -t14;
+  inverse[4] += a00 * a22;
+  inverse[4] *= t17;
+
+  inverse[7] = t10 - t6;
+  inverse[7] *= t17;
+
+  inverse[2] = a10 * a21;
+  inverse[2] += a11 * a20;
+  inverse[2] *= t17;
+
+  inverse[5] = t12;
+  inverse[5] -= a00 * a21;
+  inverse[5] *= t17;
+
+  inverse[8] = t4 - t8;
+  inverse[8] *= t17;
+
+  return std::abs(det);
+}
+
+
+#endif
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 054a09df644957474d6b758ab92ad2cdb7827bf8..8b6d3318bc46f180eb7ef4a388a20cbcc65fbbe5 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -16,8 +16,7 @@ dune_python_install_package(PATH ufl)
 dune_python_install_package(PATH .)
 
 dune_python_add_test(NAME pep8-ourcode
-                     COMMAND ${DUNE_PYTHON_VIRTUALENV_EXECUTABLE} -m pytest --pep8
-                     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python/dune/perftool
+                     COMMAND ${DUNE_PYTHON_VIRTUALENV_EXECUTABLE} -m pytest --pep8 ${CMAKE_SOURCE_DIR}/python/dune/perftool
                      )
 
 add_subdirectory(test)
diff --git a/python/dune/perftool/blockstructured/__init__.py b/python/dune/perftool/blockstructured/__init__.py
index 8849491e8e6c31f33f1a1c5d9a1e247122158137..5c1283b2096cf8bcfe55ef107c77d1ece2f8aa5e 100644
--- a/python/dune/perftool/blockstructured/__init__.py
+++ b/python/dune/perftool/blockstructured/__init__.py
@@ -9,7 +9,7 @@ from dune.perftool.pdelab.quadrature import pymbolic_quadrature_position
 from dune.perftool.blockstructured.spaces import lfs_inames
 from dune.perftool.blockstructured.basis import (pymbolic_reference_gradient,
                                                  pymbolic_basis)
-from dune.perftool.blockstructured.geometry import (pymbolic_jacobian_inverse_transposed,
+from dune.perftool.blockstructured.geometry import (pymbolic_jacobian_inverse,
                                                     pymbolic_jacobian_determinant,
                                                     pymbolic_facet_jacobian_determinant,
                                                     to_global)
@@ -55,8 +55,8 @@ class BlockStructuredInterface(PDELabInterface):
     def pymbolic_jacobian_determinant(self):
         return pymbolic_jacobian_determinant()
 
-    def pymbolic_jacobian_inverse_transposed(self, i, j, restriction):
-        return pymbolic_jacobian_inverse_transposed(i, j, restriction)
+    def pymbolic_jacobian_inverse(self, i, j, restriction):
+        return pymbolic_jacobian_inverse(i, j, restriction)
 
     def pymbolic_facet_jacobian_determinant(self):
         return pymbolic_facet_jacobian_determinant()
diff --git a/python/dune/perftool/blockstructured/geometry.py b/python/dune/perftool/blockstructured/geometry.py
index 1ffbdf0417c8261d2f0882b2384ed044af1ddd03..113f5940b0389e215c7fbaf732d812a9226dd26e 100644
--- a/python/dune/perftool/blockstructured/geometry.py
+++ b/python/dune/perftool/blockstructured/geometry.py
@@ -19,7 +19,7 @@ def pymbolic_jacobian_determinant():
 
 
 # scale Jacobian according to the order of the blockstructure
-def pymbolic_jacobian_inverse_transposed(i, j, restriction):
+def pymbolic_jacobian_inverse(i, j, restriction):
     name_jit = get_backend(interface="name_jit", selector=option_switch("constant_transformation_matrix"))(restriction)
     return prim.Product((get_form_option("number_of_blocks"),
                          prim.Subscript(prim.Variable(name_jit), (j, i))))
diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index 9661737148258057dbcaef3d775ef6d584ebc881..a28b2bc5beb7b7cb6ac5317797a0122be10bd214 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -39,6 +39,7 @@ class PerftoolGlobalOptionsArray(ImmutableRecord):
     explicit_time_stepping = PerftoolOption(default=False, helpstr="use explicit time stepping")
     exact_solution_expression = PerftoolOption(helpstr="name of the exact solution expression in the ufl file")
     compare_l2errorsquared = PerftoolOption(helpstr="maximal allowed l2 error squared of difference between numerical solution and interpolation of exact solution (NOTE: requires --exact-solution-expression)")
+    grid_info = PerftoolOption(default=None, helpstr="Path to file with information about facedir and facemod variations. This can be used to limit the generation of skeleton kernels.")
     l2error_tree_path = PerftoolOption(default=None, helpstr="Tree pathes that should be considered for l2 error calculation. Default None means we take all of them into account.")
     ini_file = PerftoolOption(helpstr="An inifile to use. A generated driver will be hard-coded to it, a [formcompiler] section will be used as default values to form compiler arguments (use snake case)")
     opcounter = PerftoolOption(default=False, helpstr="Count operations. Note: In this case only operator applications are generated since solving and operator counting does not work. You probably want to set instrumentation level>0.")
@@ -47,6 +48,7 @@ class PerftoolGlobalOptionsArray(ImmutableRecord):
     project_basedir = PerftoolOption(helpstr="The base (build) directory of the dune-perftool project")
     architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl|skylake")
     yaspgrid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.")
+    grid_unstructured = PerftoolOption(default=False, helpstr="Set to true if you want to use an unstructured grid.")
     precision_bits = PerftoolOption(default=64, helpstr="The number of bits for the floating point type")
     overlapping = PerftoolOption(default=False, helpstr="Use an overlapping solver and constraints. You still need to make sure to construct a grid with overlap! The parallel option will be set automatically.")
     operators = PerftoolOption(default="r", helpstr="A comma separated list of operators, each name will be interpreted as a subsection name within the formcompiler section")
@@ -78,6 +80,7 @@ class PerftoolFormOptionsArray(ImmutableRecord):
     constant_transformation_matrix = PerftoolOption(default=False, helpstr="set option if the jacobian of the transformation is constant on a cell")
     fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.")
     sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization")
+    sumfact_regular_jacobians = PerftoolOption(default=False, helpstr="Generate non sum-factorized jacobians (only useful if sumfact is set)")
     vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
     vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model")
     vectorization_not_fully_vectorized_error = PerftoolOption(default=False, helpstr="throw an error if nonquadloop vectorization did not fully vectorize")
diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index d3153ca0b8612b5391843a8689fa2c1c9d16bc56..f53b6ff6297efdde79344c49579e466ccd99d817 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -19,7 +19,7 @@ from dune.perftool.pdelab.geometry import (component_iname,
                                            pymbolic_facet_area,
                                            pymbolic_facet_jacobian_determinant,
                                            pymbolic_jacobian_determinant,
-                                           pymbolic_jacobian_inverse_transposed,
+                                           pymbolic_jacobian_inverse,
                                            pymbolic_unit_inner_normal,
                                            pymbolic_unit_outer_normal,
                                            to_global,
@@ -125,8 +125,8 @@ class PDELabInterface(object):
     def pymbolic_jacobian_determinant(self):
         return pymbolic_jacobian_determinant()
 
-    def pymbolic_jacobian_inverse_transposed(self, i, j, restriction):
-        return pymbolic_jacobian_inverse_transposed(i, j, restriction)
+    def pymbolic_jacobian_inverse(self, i, j, restriction):
+        return pymbolic_jacobian_inverse(i, j, restriction)
 
     def pymbolic_unit_inner_normal(self):
         return pymbolic_unit_inner_normal()
diff --git a/python/dune/perftool/pdelab/driver/gridfunctionspace.py b/python/dune/perftool/pdelab/driver/gridfunctionspace.py
index a956855ad0ee827d8f64e6b731071db82a3c851b..a4ae3604c06af34a774ba0494751085fcffc1a9e 100644
--- a/python/dune/perftool/pdelab/driver/gridfunctionspace.py
+++ b/python/dune/perftool/pdelab/driver/gridfunctionspace.py
@@ -3,7 +3,6 @@ from dune.perftool.generation import (include_file,
                                       )
 from dune.perftool.options import (get_form_option,
                                    get_option,
-                                   set_option,
                                    )
 from dune.perftool.pdelab.driver import (FEM_name_mangling,
                                          get_cell,
@@ -51,7 +50,10 @@ def typedef_grid(name):
     dim = get_dimension()
     if isQuadrilateral(get_trial_element().cell()):
         range_type = type_range()
-        if get_option("yaspgrid_offset"):
+        if get_option("grid_unstructured"):
+            gridt = "Dune::UGGrid<{}>".format(dim)
+            include_file("dune/grid/uggrid.hh", filetag="driver")
+        elif get_option("yaspgrid_offset"):
             gridt = "Dune::YaspGrid<{0}, Dune::EquidistantOffsetCoordinates<{1}, {0}>>".format(dim, range_type)
         else:
             gridt = "Dune::YaspGrid<{0}, Dune::EquidistantCoordinates<{1}, {0}>>".format(dim, range_type)
@@ -78,6 +80,13 @@ def define_grid(name):
     include_file("dune/testtools/gridconstruction.hh", filetag="driver")
     ini = name_initree()
     _type = type_grid()
+    # TODO: In principle this is only necessary if we use sum factorization in
+    # one of the operators. So this could be turned off if that is not the case.
+    if isQuadrilateral(get_trial_element().cell()) and get_option("grid_unstructured"):
+        include_file("dune/consistent-edge-orientation/createconsistentgrid.hh", filetag="driver")
+        return ["IniGridFactory<{}> factory({});".format(_type, ini),
+                "std::shared_ptr<{}> grid_nonconsistent = factory.getGrid();".format(_type),
+                "std::shared_ptr<{}> grid = createConsistentGrid(grid_nonconsistent);".format(_type)]
     return ["IniGridFactory<{}> factory({});".format(_type, ini),
             "std::shared_ptr<{}> grid = factory.getGrid();".format(_type)]
 
diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py
index 80ab0387e9728b2c05c2a214b9e7b2fb35c75ea4..f56ef3dfc554592ce741d8fd1251a90fb40574e7 100644
--- a/python/dune/perftool/pdelab/geometry.py
+++ b/python/dune/perftool/pdelab/geometry.py
@@ -267,7 +267,7 @@ def declare_normal(name, kernel, decl_info):
 
 
 def pymbolic_unit_outer_normal():
-    name = "outer_normal"
+    name = "unit_outer_normal"
     if not get_form_option("diagonal_transformation_matrix"):
         temporary_variable(name, shape=(world_dimension(),), decl_method=declare_normal)
         evaluate_unit_outer_normal(name)
@@ -338,7 +338,8 @@ def define_jacobian_inverse_transposed(name, restriction):
     dim = world_dimension()
     temporary_variable(name, decl_method=define_jacobian_inverse_transposed_temporary(restriction), shape=(dim, dim))
     geo = name_cell_geometry(restriction)
-    pos = get_backend("qp_in_cell", selector=option_switch(("blockstructured", "sumfact")))(restriction)
+    pos = get_backend("qp_in_cell", selector=option_switch("blockstructured"))(restriction)
+
     return quadrature_preamble("{} = {}.jacobianInverseTransposed({});".format(name,
                                                                                geo,
                                                                                str(pos),
@@ -356,10 +357,11 @@ def name_jacobian_inverse_transposed(restriction):
     return name
 
 
-def pymbolic_jacobian_inverse_transposed(i, j, restriction):
+def pymbolic_jacobian_inverse(i, j, restriction):
+    # Calculate jacobian_inverse_transposed
     name = get_backend(interface="name_jit", selector=option_switch("constant_transformation_matrix"))(restriction)
-    # Dune only has JacobianInverseTransposed as a first class citizen,
-    # so we need to switch the indices around!
+
+    # Return jacobian inverse -> (j, i) instead of (i, j)
     return prim.Subscript(prim.Variable(name), (j, i))
 
 
@@ -395,6 +397,7 @@ def define_jacobian_determinant(name):
                                                     geo,
                                                     str(pos),
                                                     )
+
     return quadrature_preamble(code,
                                assignees=frozenset({name}),
                                read_variables=frozenset({get_pymbolic_basename(pos)}),
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 727b7ae35fae4ef86fe1a6884204d4cbf3acfb2f..4a374577055ffe6f3784bb9a9293488e1ac34b71 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -7,7 +7,8 @@ import numpy as np
 
 from dune.perftool.options import (get_form_option,
                                    get_option,
-                                   option_switch)
+                                   option_switch,
+                                   set_form_option)
 from dune.perftool.generation import (backend,
                                       base_class,
                                       class_basename,
@@ -740,7 +741,7 @@ def local_operator_default_settings(operator, form):
 
     # Set some options!
     from dune.perftool.pdelab.driver import isQuadrilateral
-    if isQuadrilateral(form.arguments()[0].ufl_element().cell()):
+    if isQuadrilateral(form.arguments()[0].ufl_element().cell()) and not get_option("grid_unstructured"):
         from dune.perftool.options import set_form_option
         # For Yasp Grids the jacobian of the transformation is diagonal and constant on each cell
         set_form_option('diagonal_transformation_matrix', True)
@@ -840,6 +841,10 @@ def generate_jacobian_kernels(form, original_form):
     operator_kernels = {}
     with global_context(form_type="jacobian"):
         if get_form_option("generate_jacobians"):
+            if get_form_option("sumfact"):
+                was_sumfact = True
+                if get_form_option("sumfact_regular_jacobians"):
+                    set_form_option("sumfact", False)
             for measure in set(i.integral_type() for i in jacform.integrals()):
                 logger.info("generate_jacobian_kernels: measure {}".format(measure))
                 with global_context(integral_type=measure):
@@ -857,6 +862,9 @@ def generate_jacobian_kernels(form, original_form):
                 with global_context(integral_type=it):
                     from dune.perftool.pdelab.signatures import assembly_routine_signature
                     operator_kernels[(it, 'jacobian')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)]
+            if get_form_option("sumfact_regular_jacobians"):
+                if was_sumfact:
+                    set_form_option("sumfact", True)
 
     # Jacobian apply methods for matrix-free computations
     if get_form_option("matrix_free"):
diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py
index 33d3e2d68398f1eda22a98e3d4125df5b99aae96..d3192b6f278bc7e0d050a500da86a09d905a4a7e 100644
--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -61,6 +61,8 @@ class SumFactInterface(PDELabInterface):
         return ret
 
     def quadrature_inames(self):
+        # TODO Is broken at the moment. Remove from interface? See also pdelab interface.
+        assert False
         return quadrature_inames()
 
     def pymbolic_quadrature_weight(self):
@@ -77,20 +79,24 @@ class SumFactInterface(PDELabInterface):
 
     def pymbolic_unit_outer_normal(self):
         from dune.perftool.sumfact.geometry import pymbolic_unit_outer_normal
-        ret, indices = pymbolic_unit_outer_normal(self.visitor.indices)
-        self.visitor.indices = indices
-        return ret
+        return pymbolic_unit_outer_normal(self.visitor)
 
     def pymbolic_unit_inner_normal(self):
         from dune.perftool.sumfact.geometry import pymbolic_unit_inner_normal
-        ret, indices = pymbolic_unit_inner_normal(self.visitor.indices)
-        self.visitor.indices = indices
-        return ret
+        return pymbolic_unit_inner_normal(self.visitor)
 
     def pymbolic_facet_jacobian_determinant(self):
         from dune.perftool.sumfact.geometry import pymbolic_facet_jacobian_determinant
-        return pymbolic_facet_jacobian_determinant()
+        return pymbolic_facet_jacobian_determinant(self.visitor)
 
     def pymbolic_facet_area(self):
         from dune.perftool.sumfact.geometry import pymbolic_facet_area
-        return pymbolic_facet_area()
+        return pymbolic_facet_area(self.visitor)
+
+    def pymbolic_jacobian_inverse(self, i, j, restriction):
+        from dune.perftool.sumfact.geometry import pymbolic_jacobian_inverse
+        return pymbolic_jacobian_inverse(i, j, restriction, self.visitor)
+
+    def pymbolic_jacobian_determinant(self):
+        from dune.perftool.sumfact.geometry import pymbolic_jacobian_determinant
+        return pymbolic_jacobian_determinant(self.visitor)
diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index 4e7c2a6991b1c6bda49862de0aa47167c1b1ea2a..abad6788cb31f90276dbde32e2fb5aa2b6faa3af 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -24,7 +24,6 @@ from dune.perftool.options import (get_form_option,
                                    )
 from dune.perftool.loopy.flatten import flatten_index
 from dune.perftool.loopy.target import type_floatingpoint
-from dune.perftool.sumfact.quadrature import nest_quadrature_loops
 from dune.perftool.pdelab.driver import FEM_name_mangling
 from dune.perftool.pdelab.localoperator import determine_accumulation_space
 from dune.perftool.pdelab.restriction import restricted_name
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index 39bba49e67fe66a4541ac0ae633e6e403be32c52..e9584d1d644ec6ec3c61b2b8cd863e54192b47c9 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -7,7 +7,6 @@ import itertools
 
 from dune.perftool.generation import (backend,
                                       domain,
-                                      get_backend,
                                       get_counted_variable,
                                       get_counter,
                                       get_global_context_value,
@@ -66,7 +65,7 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord):
                                  )
 
     def __repr__(self):
-        return "{}_{}".format(self.coeff_func(self.restriction), self.element_index)
+        return ImmutableRecord.__repr__(self)
 
     def __str__(self):
         return repr(self)
diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py
index 0f0f25e8635fd2e6cd89d6e21d811bc7c2f47f9d..c4471ee3ab329eef051bfd1a27f0eb0fce1bd4c3 100644
--- a/python/dune/perftool/sumfact/geometry.py
+++ b/python/dune/perftool/sumfact/geometry.py
@@ -3,53 +3,99 @@
 from dune.perftool.generation import (backend,
                                       class_member,
                                       domain,
-                                      get_backend,
                                       get_counted_variable,
                                       iname,
+                                      include_file,
                                       instruction,
                                       kernel_cached,
                                       preamble,
+                                      silenced_warning,
                                       temporary_variable,
+                                      get_global_context_value,
                                       globalarg,
+                                      valuearg,
                                       )
+from dune.perftool.options import get_option
 from dune.perftool.pdelab.geometry import (local_dimension,
                                            world_dimension,
+                                           name_cell_geometry,
                                            name_geometry,
                                            )
-from dune.perftool.sumfact.switch import get_facedir
-from dune.perftool.sumfact.symbolic import SumfactKernelInterfaceBase
+from dune.perftool.pdelab.localoperator import (name_ansatz_gfs_constructor_param,
+                                                lop_template_ansatz_gfs,
+                                                lop_template_range_field,
+                                                )
+from dune.perftool.pdelab.restriction import restricted_name
+from dune.perftool.sumfact.basis import construct_basis_matrix_sequence
+from dune.perftool.sumfact.quadrature import (additional_inames,
+                                              default_quadrature_inames)
+from dune.perftool.sumfact.realization import (name_buffer_storage,
+                                               realize_sum_factorization_kernel,
+                                               )
+from dune.perftool.sumfact.switch import get_facedir, get_facemod
+from dune.perftool.sumfact.symbolic import SumfactKernelInterfaceBase, SumfactKernel
+from dune.perftool.sumfact.tabulation import (quadrature_points_per_direction,
+                                              BasisTabulationMatrix,
+                                              )
 from dune.perftool.sumfact.vectorization import attach_vectorization_info
+from dune.perftool.tools import get_pymbolic_basename
 from dune.perftool.options import get_form_option, option_switch
 from dune.perftool.ufl.modified_terminals import Restriction
 
 from pytools import ImmutableRecord
 
+from loopy.match import Writes
+
 import pymbolic.primitives as prim
 import numpy as np
 
 
 @iname
-def corner_iname():
-    name = get_counted_variable("corneriname")
-    domain(name, 2 ** local_dimension())
+def global_corner_iname(restriction):
+    name = get_counted_variable(restricted_name("global_corneriname", restriction))
+    domain(name, 2 ** world_dimension())
     return name
 
 
 class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord):
-    def __init__(self, dir):
-        ImmutableRecord.__init__(self, dir=dir)
+    def __init__(self, direction, restriction):
+        """Base class for sum-factorized evaluation of geometry mappings
+
+        At the moment we only do this for cells and not faces. For
+        intersections we do this corresponding reference elements of the
+        neigboring cells.
+
+        Each spatial component needs a seperate sum factorization kernel.  The
+        argument 'direction' specifies the component (x-component: 0,
+        y-component: 1, z-component: 2).
+        """
+        ImmutableRecord.__init__(self, direction=direction, restriction=restriction)
+
+    @property
+    def stage(self):
+        return 1
+
+    def __repr__(self):
+        return ImmutableRecord.__repr__(self)
 
-    def realize(self, sf, index, insn_dep):
-        from dune.perftool.sumfact.realization import name_buffer_storage
+    def realize(self, sf, insn_dep):
+        # TODO: When we use vectorization this should be the offset
+        assert(get_form_option('vectorization_strategy') == 'none')
+        index = 0
+
+        # Note: world_dimension, since we only do evaluation of cell geometry mappings
         name = "input_{}".format(sf.buffer)
         temporary_variable(name,
-                           shape=(2 ** local_dimension(), sf.vector_width),
+                           shape=(2 ** world_dimension(), sf.vector_width),
                            custom_base_storage=name_buffer_storage(sf.buffer, 0),
                            managed=True,
                            )
+        ciname = global_corner_iname(self.restriction)
 
-        ciname = corner_iname()
-        geo = name_geometry()
+        if self.restriction == 0:
+            geo = name_geometry()
+        else:
+            geo = name_cell_geometry(self.restriction)
 
         # NB: We need to realize this as a C instruction, because the corner
         #     method does return a non-scalar, which does not fit into the current
@@ -61,35 +107,47 @@ class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord):
                                                           index,
                                                           geo,
                                                           ciname,
-                                                          self.dir,
+                                                          self.direction,
                                                           )
 
-        instruction(code=code,
-                    within_inames=frozenset({ciname}),
-                    assignees=(name,),
-                    tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
-                    )
+        insn = instruction(code=code,
+                           within_inames=frozenset({ciname}),
+                           assignees=(name,),
+                           tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
+                           )
+
+        return insn_dep.union(frozenset({insn}))
 
 
 @backend(interface="spatial_coordinate", name="default")
 def pymbolic_spatial_coordinate_multilinear(do_predicates, visitor):
+    """Calculate global coordinates of quadrature points for multilinear geometry mappings
+
+    The evalualation is done using sum factorization techniques.
+
+    On facets we use the geometry mapping of the self cell to get a global
+    evaluation of the quadrature points. Avoiding the geometry mapping of the
+    face itself greatly simplifies the generation of code for unstructured
+    grids. Instead of looking at all the possible embeddings of the reference
+    element of the face into the reference element of the cell we can make the
+    grid edge consistent and choose a suitable convention for the order of
+    directions in our sum factorization kernels.
+    """
     assert len(visitor.indices) == 1
 
-    # Construct the matrix sequence for the evaluation of the global coordinate.
-    # We need to manually construct this one, because on facets, we want to use the
-    # geometry embedding of the facet into the global space directly without going
-    # through the neighboring cell geometries. That matrix sequence will only have
-    # dim-1 matrices!
-    from dune.perftool.sumfact.tabulation import quadrature_points_per_direction, BasisTabulationMatrix
-    quadrature_size = quadrature_points_per_direction()
-    matrix_sequence = tuple(BasisTabulationMatrix(direction=i, basis_size=2) for i in range(world_dimension()) if i != get_facedir(visitor.restriction))
-    inp = GeoCornersInput(visitor.indices[0])
-
-    from dune.perftool.sumfact.symbolic import SumfactKernel
+    # Adjust restriction for boundary facets
+    restriction = visitor.restriction
+    if get_global_context_value("integral_type") == "exterior_facet":
+        restriction = 1
+
+    # Generate sum factorization kernel and add vectorization info
+    matrix_sequence = construct_basis_matrix_sequence(facedir=get_facedir(restriction),
+                                                      facemod=get_facemod(restriction),
+                                                      basis_size=(2,) * world_dimension())
+    inp = GeoCornersInput(visitor.indices[0], restriction)
     sf = SumfactKernel(matrix_sequence=matrix_sequence,
                        interface=inp,
                        )
-
     vsf = attach_vectorization_info(sf)
 
     # Add a sum factorization kernel that implements the evaluation of
@@ -97,7 +155,10 @@ def pymbolic_spatial_coordinate_multilinear(do_predicates, visitor):
     from dune.perftool.sumfact.realization import realize_sum_factorization_kernel
     var, _ = realize_sum_factorization_kernel(vsf)
 
+    # The results of this function is already the right component of the
+    # spatial coordinate and we don't need to index this in the visitor
     visitor.indices = None
+
     return prim.Subscript(var, vsf.quadrature_index(sf, visitor))
 
 
@@ -152,7 +213,6 @@ def pymbolic_spatial_coordinate_axiparallel(do_predicates, visitor):
     # Urgh: *SOMEHOW* construct a face direction
     from dune.perftool.pdelab.restriction import Restriction
     restriction = Restriction.NONE
-    from dune.perftool.generation import get_global_context_value
     if get_global_context_value("integral_type") == "interior_facet":
         restriction = Restriction.POSITIVE
     from dune.perftool.sumfact.switch import get_facedir
@@ -180,69 +240,171 @@ def pymbolic_spatial_coordinate_axiparallel(do_predicates, visitor):
     return prim.Subscript(prim.Variable(lowcorner), (index,)) + x * prim.Subscript(prim.Variable(meshwidth), (index,))
 
 
-def pymbolic_unit_outer_normal(visitor_indices):
-    index, = visitor_indices
+def define_outer_normal(name, visitor):
+    """Calculate outer normal (length not necessarily 1)
+
+    This will be used to calculate the unit outer normal and the determinant of
+    the Jacobian of the geometry mapping of the face.
+
+    The outer normal is calculated by multiplying the Jacobian inverse
+    transposed of the geometry mapping of the 'self'-cell with the unit outer
+    normal \hat{n_s} of the face corresponding to the intersetcion in the
+    reference element of the 'self'-cell:
+
+    n = (\nabla \mu_{T_s})^{-T} \hat{n_s}
+
+    Instead of setting up \hat{n_s} we just look at facedir_s and facemod_s.
+    """
+    facedir_s = get_facedir(Restriction.POSITIVE)
+    facemod_s = get_facemod(Restriction.POSITIVE)
+    temporary_variable(name, shape=(world_dimension(),))
+    for i in range(world_dimension()):
+        assignee = prim.Subscript(prim.Variable(name), i)
+        jit = pymbolic_jacobian_inverse(i, facedir_s, Restriction.POSITIVE, visitor)
+
+        # Note: 2*facemod_s-1 because of
+        # -1 if facemod_s = 0
+        # +1 if facemod_s = 1
+        expression = jit * (2 * facemod_s - 1)
+
+        inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+        instruction(assignee=assignee,
+                    expression=expression,
+                    within_inames=frozenset(inames),
+                    )
+
+
+def name_outer_normal(visitor):
+    name = "outer_normal"
+    define_outer_normal(name, visitor)
+    return name
+
+
+def pymbolic_outer_normal(visitor):
+    name = name_outer_normal(visitor)
+    return prim.Variable(name)
+
+
+def define_norm_of_outer_normal(name, visitor):
+    outer_normal = pymbolic_outer_normal(visitor)
+
+    # Calculate the norm of outer_normal
+    temporary_variable(name, shape=())
+    expression = 0
+    for i in range(world_dimension()):
+        expression = prim.Sum((expression, prim.Subscript(outer_normal, i) * prim.Subscript(outer_normal, i)))
+    expression = prim.Call(prim.Variable("sqrt"), (expression,))
+    inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+
+    # Apparently the call of sqrt shadows the dependency on outer normal. Add manually.
+    instruction(assignee=prim.Variable(name),
+                expression=expression,
+                depends_on=frozenset({Writes(get_pymbolic_basename(outer_normal))}),
+                within_inames=frozenset(inames),
+                )
+
+
+def name_norm_of_outer_normal(visitor):
+    name = "norm_of_outer_normal"
+    define_norm_of_outer_normal(name, visitor)
+    return name
+
+
+def pymbolic_norm_of_outer_normal(visitor):
+    name = name_norm_of_outer_normal(visitor)
+    return prim.Variable(name)
+
+
+def pymbolic_unit_outer_normal(visitor):
+    assert (len(visitor.indices) is 1)
+    index = visitor.indices[0]
     assert isinstance(index, int)
     if get_form_option("diagonal_transformation_matrix"):
+        # In this case we just return -1, 0 or 1 depending on the current
+        # facemod and facedir. We don't need to (and cannot) index these ints
+        # so we set the indices of the visitor to None.
+        visitor.indices = None
+
+        # Use facemod_s and facedir_s
         from dune.perftool.sumfact.switch import get_facedir, get_facemod
         if index == get_facedir(Restriction.POSITIVE):
             if get_facemod(Restriction.POSITIVE):
-                return 1, None
+                return 1
             else:
-                return -1, None
+                return -1
         else:
-            return 0, None
+            return 0
     else:
-        from dune.perftool.pdelab.geometry import pymbolic_unit_outer_normal as _norm
-        return _norm(), visitor_indices
+        # In pymbolic_outer_normal we need to create the
+        # jacobian_inverse_transposed which doesn't play nicely with the
+        # visitor having indices. Maybe this should be improved. For now let's
+        # just set them to None and restore them afterwards.
+        visitor.indices = None
+        outer_normal = pymbolic_outer_normal(visitor)
+        norm = pymbolic_norm_of_outer_normal(visitor)
+        visitor.indices = (index,)
+
+        # Create unit_outer_normal by scaling outer_normal to length 1
+        name = "unit_outer_normal"
+        temporary_variable(name, shape=(world_dimension(),))
+        inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+        for i in range(world_dimension()):
+            instruction(assignee=prim.Subscript(prim.Variable(name), i),
+                        expression=prim.Subscript(outer_normal, i) / norm,
+                        within_inames=frozenset(inames),
+                        )
+
+        return prim.Variable(name)
 
 
 def pymbolic_unit_inner_normal(visitor_indices):
-    index, = visitor_indices
+    # Note: The implementation of this was adjusted for unstructured grids but
+    # it was not properly tested. If you need the unit inner normal just remove
+    # the assert(False) statement but make sure to verify that this does the
+    # right thing.
+    assert(False)
+
+    assert (len(visitor.indices) is 1)
+    index = visitor.indices[0]
     assert isinstance(index, int)
     if get_form_option("diagonal_transformation_matrix"):
-        from dune.perftool.sumfact.switch import get_facedir, get_facemod
+        # In this case we just return -1, 0 or 1 depending on the current
+        # facemod and facedir. We don't need to (and cannot) index these ints
+        # so we set the indices of the visitor to None.
+        visitor.indices = None
         if index == get_facedir(Restriction.POSITIVE):
             if get_facemod(Restriction.POSITIVE):
-                return -1, None
+                return -1
             else:
-                return 1, None
+                return 1
         else:
-            return 0, None
+            return 0
     else:
-        from dune.perftool.pdelab.geometry import pymbolic_unit_inner_normal as _norm
-        return _norm(), visitor_indices
+        # In pymbolic_outer_normal we need to create the
+        # jacobian_inverse_transposed which doesn't play nicely with the
+        # visitor having indices. Maybe this should be improved. For now let's
+        # just set them to None and restore them afterwards.
+        visitor.indices = None
+        outer_normal = pymbolic_outer_normal(visitor)
+        norm = pymbolic_norm_of_outer_normal(visitor)
+        visitor.indices = (index,)
 
+        # Create unit_outer_normal by scaling outer_normal to length 1
+        name = "unit_outer_normal"
+        temporary_variable(name, shape=(world_dimension(),))
 
-def pymbolic_facet_jacobian_determinant():
-    if get_form_option("constant_transformation_matrix"):
-        return pymbolic_constant_facet_jacobian_determinant()
-    else:
-        from dune.perftool.pdelab.geometry import pymbolic_facet_jacobian_determinant as _norm
-        return _norm()
+        inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+        for i in range(world_dimension()):
+            instruction(assignee=prim.Subscript(prim.Variable(name), i),
+                        expression=-1 * prim.Subscript(outer_normal, i) / norm,
+                        within_inames=frozenset(inames),
+                        )
 
-
-def pymbolic_constant_facet_jacobian_determinant():
-    facedir = get_facedir(Restriction.POSITIVE)
-    assert isinstance(facedir, int)
-
-    name = "fdetjac"
-    define_constant_facet_jacobian_determinant(name)
-    globalarg(name, shape=(world_dimension(),))
-    return prim.Subscript(prim.Variable(name), (facedir,))
-
-
-@class_member(classtag="operator")
-def define_constant_facet_jacobian_determinant(name):
-    define_constant_facet_jacobian_determinant_eval(name)
-    from dune.perftool.pdelab.localoperator import lop_template_ansatz_gfs
-    gfst = lop_template_ansatz_gfs()
-    return "std::array<typename {}::Traits::GridView::template Codim<0>::Geometry::ctype, {}> {};".format(gfst, world_dimension(), name)
+        return prim.Variable(name)
 
 
 @kernel_cached(kernel="operator")
 def define_constant_facet_jacobian_determinant_eval(name):
-    from dune.perftool.pdelab.localoperator import name_ansatz_gfs_constructor_param, lop_template_range_field
     gfs = name_ansatz_gfs_constructor_param()
     rft = lop_template_range_field()
     code = ["{",
@@ -257,9 +419,196 @@ def define_constant_facet_jacobian_determinant_eval(name):
                 )
 
 
-def pymbolic_facet_area():
+@class_member(classtag="operator")
+def define_constant_facet_jacobian_determinant(name):
+    define_constant_facet_jacobian_determinant_eval(name)
+    gfst = lop_template_ansatz_gfs()
+    return "std::array<typename {}::Traits::GridView::template Codim<0>::Geometry::ctype, {}> {};".format(gfst, world_dimension(), name)
+
+
+def pymbolic_constant_facet_jacobian_determinant():
+    facedir = get_facedir(Restriction.POSITIVE)
+    assert isinstance(facedir, int)
+
+    name = "fdetjac"
+    define_constant_facet_jacobian_determinant(name)
+    globalarg(name, shape=(world_dimension(),))
+    return prim.Subscript(prim.Variable(name), (facedir,))
+
+
+def define_facet_jacobian_determinant(name, visitor):
+    """Absolute value of determinant of jacobian of facet geometry mapping
+
+    Calculate the absolute value of the determinant of the jacobian of the
+    geometry mapping from the reference cell of the intersection to the
+    intersection:
+
+    |\det(\nabla \mu_F)|
+
+    This is done using the surface metric tensor lemma from the lecture notes
+    of Jean-Luc Guermond:
+
+    |\det(\nabla \mu_F)| = \| \tilde{n} \| |\det(\nabla \mu_{T_s})|
+
+    Here \tilde{n} is the outer normal defined by:
+
+    \tilde{n} = (\nabla \mu_{T_s})^{-T} \hat{n}_s
+
+    where \hat{n}_s is the unit outer normal of the corresponding face of the
+    reference cell.
+    """
+    # Calculate norm of outer normal and determinant of jacobian of geometry
+    # mapping of the inside cell -> set restriction to 1 for boundary facets
+    if get_global_context_value("integral_type") == "exterior_facet":
+        assert visitor.restriction is 0
+        visitor.restriction = 1
+    norm_of_outer_normal = pymbolic_norm_of_outer_normal(visitor)
+    detjac = pymbolic_jacobian_determinant(visitor)
+    if get_global_context_value("integral_type") == "exterior_facet":
+        visitor.restriction = 0
+
+    # Multiply detjac and norm_of_outer_normal
+    temporary_variable(name, shape=())
+    inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+    instruction(assignee=prim.Variable(name),
+                expression=detjac * norm_of_outer_normal,
+                within_inames=frozenset(inames),
+                )
+
+
+def name_facet_jacobian_determinant(visitor):
+    name = "fdetjac"
+    define_facet_jacobian_determinant(name, visitor)
+    return name
+
+
+def pymbolic_facet_jacobian_determinant(visitor):
+    if get_form_option("constant_transformation_matrix"):
+        return pymbolic_constant_facet_jacobian_determinant()
+    else:
+        name = name_facet_jacobian_determinant(visitor)
+        return prim.Variable(name)
+
+
+def pymbolic_facet_area(visitor):
     if get_form_option("constant_transformation_matrix"):
-        return pymbolic_facet_jacobian_determinant()
+        return pymbolic_facet_jacobian_determinant(visitor)
     else:
-        from dune.perftool.pdelab.geometry import pymbolic_facet_area as _norm
-        return _norm()
+        from dune.perftool.pdelab.geometry import pymbolic_facet_area as nonconstant_facet_area
+        return nonconstant_facet_area()
+
+
+def _name_jacobian(i, j, restriction, visitor):
+    """Return the (i, j) component of the jacobian of the geometry mapping
+
+    Evaluation of the derivative of the geometry mapping is done using sum
+    factorization.
+
+    Note: At the moment this only works for the mappings from reference cells
+    to the cell and not for the geometry mappings of intersections.
+    """
+    do_predicates = visitor.do_predicates
+
+    # Create matrix sequence with derivative in j direction
+    matrix_sequence = construct_basis_matrix_sequence(derivative=j,
+                                                      facedir=get_facedir(restriction),
+                                                      facemod=get_facemod(restriction),
+                                                      basis_size=(2,) * world_dimension())
+
+    # Sum factorization input for the i'th component of the geometry mapping
+    inp = GeoCornersInput(i, restriction)
+
+    sf = SumfactKernel(matrix_sequence=matrix_sequence,
+                       interface=inp,
+                       )
+    vsf = attach_vectorization_info(sf)
+
+    # Add a sum factorization kernel that implements the evaluation of
+    # the basis functions at quadrature points (stage 1)
+    var, _ = realize_sum_factorization_kernel(vsf)
+
+    assert(visitor.indices is None)
+    return prim.Subscript(var, vsf.quadrature_index(sf, visitor))
+
+
+def define_jacobian_inverse(name, restriction, visitor):
+    """Return jacobian inverse transposed of the geometry mapping (of a cell)
+
+    At the moment this only works for geometry mappings of cells and not for
+    intersection. We only consider this case as it greatly simplifies code
+    generation for unstructured grids by making the grid edge consistent and
+    avoiding the face-twist problem.
+    """
+    # Calculate the jacobian of the geometry mapping using sum factorization
+    dim = world_dimension()
+    names_jacobian = []
+    for j in range(dim):
+        for i in range(dim):
+            name_jacobian = restricted_name("jacobian_geometry_{}{}".format(i, j), restriction)
+            temporary_variable(name_jacobian)
+            assignee = prim.Variable(name_jacobian)
+            expression = _name_jacobian(i, j, restriction, visitor)
+            inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+            instruction(assignee=assignee,
+                        expression=expression,
+                        within_inames=frozenset(inames),
+                        )
+            names_jacobian.append(name_jacobian)
+
+    # Calculate the inverse of the jacobian of the geometry mapping and the
+    # determinant by calling a c++ function
+    name_detjac = name_jacobian_determinant(visitor)
+    temporary_variable(name_detjac, shape=())
+    temporary_variable(name, shape=(dim, dim), managed=True)
+    include_file('dune/perftool/sumfact/invertgeometry.hh', filetag='operatorfile')
+    code = "{} = invert_and_return_determinant({}, {});".format(name_detjac,
+                                                                ", ".join(names_jacobian),
+                                                                name)
+    silenced_warning("read_no_write({})".format(name_detjac))
+    inames = default_quadrature_inames(visitor) + additional_inames(visitor)
+    return instruction(code=code,
+                       assignees=frozenset([name, name_detjac]),
+                       read_variables=frozenset(names_jacobian),
+                       inames=frozenset(inames),
+                       )
+
+
+def name_jacobian_inverse(restriction, visitor):
+    name = restricted_name("jacobian_inverse", restriction)
+    define_jacobian_inverse(name, restriction, visitor)
+    return name
+
+
+def pymbolic_jacobian_inverse(i, j, restriction, visitor):
+    # Switch between implementations without using the backend mechanism. In
+    # the case of constant_transformation_matrix we want to use the pdelab
+    # branch, otherwise we want to go into the sumfact implementation instead
+    # of pdelab.
+    if get_form_option("constant_transformation_matrix"):
+        from dune.perftool.pdelab.geometry import name_constant_jacobian_inverse_transposed
+        name = name_constant_jacobian_inverse_transposed(restriction)
+        # Return jacobian inverse -> (j, i) instead of (i, j)
+        return prim.Subscript(prim.Variable(name), (j, i))
+    else:
+        name = name_jacobian_inverse(restriction, visitor)
+        return prim.Subscript(prim.Variable(name), (i, j))
+
+
+def name_jacobian_determinant(visitor):
+    name = "detjac"
+    return name
+
+
+def pymbolic_jacobian_determinant(visitor):
+    if get_form_option("constant_transformation_matrix"):
+        from dune.perftool.pdelab.geometry import pymbolic_jacobian_determinant
+        return pymbolic_jacobian_determinant()
+    else:
+        # The calculation of the jacobian determinant happens in this function
+        if visitor.measure == 'cell':
+            restriction = 0
+        else:
+            restriction = 1
+        name_jacobian_inverse(restriction, visitor)
+
+        return prim.Variable(name_jacobian_determinant(visitor))
diff --git a/python/dune/perftool/sumfact/quadrature.py b/python/dune/perftool/sumfact/quadrature.py
index fdd359578cec007e0bfe0ac9fa52fe7783afa0fc..6fe70fa441979c4ae00a5f44acc8136def9f5fb0 100644
--- a/python/dune/perftool/sumfact/quadrature.py
+++ b/python/dune/perftool/sumfact/quadrature.py
@@ -2,13 +2,15 @@ from dune.perftool.generation import (backend,
                                       domain,
                                       function_mangler,
                                       get_global_context_value,
+                                      globalarg,
                                       iname,
                                       instruction,
                                       kernel_cached,
                                       loopy_class_member,
+                                      preamble,
                                       temporary_variable,
                                       )
-
+from dune.perftool.sumfact.switch import get_facedir
 from dune.perftool.sumfact.tabulation import (quadrature_points_per_direction,
                                               local_quadrature_points_per_direction,
                                               name_oned_quadrature_points,
@@ -37,20 +39,6 @@ import loopy as lp
 import numpy as np
 
 
-def nest_quadrature_loops(kernel, inames):
-    insns = []
-    for insn in kernel.instructions:
-        if "quad" in insn.tags:
-            insns.append(insn.copy(within_inames=insn.within_inames.union(frozenset(inames)),
-                                   tags=insn.tags - frozenset({"quad"})
-                                   )
-                         )
-        else:
-            insns.append(insn)
-
-    return kernel.copy(instructions=insns)
-
-
 class BaseWeight(FunctionIdentifier):
     def __init__(self, accumobj):
         self.accumobj = accumobj
@@ -78,7 +66,15 @@ def pymbolic_base_weight():
     return 1.0
 
 
-@backend(interface="quad_inames", name="sumfact")
+def default_quadrature_inames(visitor):
+    info = visitor.current_info[1]
+    if info is None:
+        element = None
+    else:
+        element = info.element
+    return quadrature_inames(element)
+
+
 @kernel_cached
 def quadrature_inames(element):
     if element is None:
@@ -176,12 +172,14 @@ def quadrature_weight(visitor):
         element = None
     else:
         element = info.element
-    return prim.Subscript(lp.symbolic.TaggedVariable(name, "operator_precomputed"), tuple(prim.Variable(i) for i in quadrature_inames(element)))
+    return prim.Subscript(lp.symbolic.TaggedVariable(name, "operator_precomputed"),
+                          tuple(prim.Variable(i) for i in quadrature_inames(element)))
 
 
-def define_quadrature_position(name, index):
+def define_quadrature_position(name, local_index):
     qps_per_dir = quadrature_points_per_direction()
-    qp_bound = qps_per_dir[index]
+    local_qps_per_dir = local_quadrature_points_per_direction()
+    qp_bound = local_qps_per_dir[index]
     expression = Subscript(Variable(name_oned_quadrature_points(qp_bound)),
                            (Variable(quadrature_inames()[index]),))
     instruction(expression=expression,
@@ -192,20 +190,29 @@ def define_quadrature_position(name, index):
                 )
 
 
-def pymbolic_indexed_quadrature_position(index, visitor):
+def pymbolic_indexed_quadrature_position(local_index, visitor):
+    """Return the value of the indexed local quadrature points
+
+    Arguments:
+    ----------
+
+    local_index: Local index to the quadrature point. Local means that on the
+        face of a 3D Element the index always will be 0 or 1 but never 2.
+    visitor: UFL tree visitor
+    """
     # Return the non-precomputed version
     if not get_form_option("precompute_quadrature_info"):
         name = 'pos'
         temporary_variable(name, shape=(local_dimension(),), shape_impl=("fv",))
-        define_quadrature_position(name, index)
-        return prim.Subscript(prim.Variable(name), (index,))
+        define_quadrature_position(name, local_index)
+        return prim.Subscript(prim.Variable(name), (local_index,))
 
-    assert isinstance(index, int)
+    assert isinstance(local_index, int)
     dim = local_dimension()
     qps_per_dir = quadrature_points_per_direction()
     local_qps_per_dir = local_quadrature_points_per_direction()
     local_qps_per_dir_str = '_'.join(map(str, local_qps_per_dir))
-    name = "quad_points_dim{}_num{}_dir{}".format(dim, local_qps_per_dir_str, index)
+    name = "quad_points_dim{}_num{}_localdir{}".format(dim, local_qps_per_dir_str, local_index)
 
     loopy_class_member(name,
                        shape=local_qps_per_dir,
@@ -218,8 +225,8 @@ def pymbolic_indexed_quadrature_position(index, visitor):
     # Precompute it in the constructor
     inames = constructor_quadrature_inames(dim)
     assignee = prim.Subscript(prim.Variable(name), tuple(prim.Variable(i) for i in inames))
-    expression = Subscript(Variable(name_oned_quadrature_points(local_qps_per_dir[index])),
-                           (prim.Variable(inames[index])))
+    expression = Subscript(Variable(name_oned_quadrature_points(local_qps_per_dir[local_index])),
+                           (prim.Variable(inames[local_index])))
     instruction(assignee=assignee,
                 expression=expression,
                 within_inames=frozenset(inames),
@@ -234,10 +241,22 @@ def pymbolic_indexed_quadrature_position(index, visitor):
     return prim.Subscript(lp.symbolic.TaggedVariable(name, "operator_precomputed"), tuple(prim.Variable(i) for i in quadrature_inames(element)))
 
 
-@backend(interface="qp_in_cell", name="sumfact")
-def pymbolic_quadrature_position_in_cell(restriction):
-    # TODO: This code path is broken at the moment.
-    assert False
+def additional_inames(visitor):
+    """Return inames for iterating over ansatz space in the case of jacobians
+    """
+    info = visitor.current_info[1]
+    if info is None:
+        element = None
+    else:
+        element = info.element
 
-    from dune.perftool.pdelab.geometry import to_cell_coordinates
-    return to_cell_coordinates(pymbolic_quadrature_position(), restriction)
+    if element is not None:
+        from dune.perftool.pdelab.restriction import Restriction
+        restriction = Restriction.NONE
+        if get_global_context_value("integral_type") == "interior_facet":
+            restriction = Restriction.POSITIVE
+        from dune.perftool.sumfact.basis import lfs_inames
+        lfs_inames = lfs_inames(element, restriction)
+        return lfs_inames
+    else:
+        return ()
diff --git a/python/dune/perftool/sumfact/switch.py b/python/dune/perftool/sumfact/switch.py
index 8c6a0f13ace27b6e3081030607149b7b003aa82c..8a44d50923226b7971c46389d245760e587fde89 100644
--- a/python/dune/perftool/sumfact/switch.py
+++ b/python/dune/perftool/sumfact/switch.py
@@ -1,5 +1,7 @@
 """ boundary and skeleton integrals come in variants in sum factorization - implement the switch! """
 
+import csv
+
 from dune.perftool.generation import (backend,
                                       get_global_context_value,
                                       global_context,
@@ -10,7 +12,7 @@ from dune.perftool.pdelab.signatures import (assembly_routine_args,
                                              assembly_routine_signature,
                                              kernel_name,
                                              )
-from dune.perftool.options import get_form_option
+from dune.perftool.options import get_form_option, get_option
 from dune.perftool.cgen.clazz import ClassMember
 
 
@@ -54,7 +56,21 @@ def get_kernel_name(facedir_s=None, facemod_s=None, facedir_n=None, facemod_n=No
 def decide_if_kernel_is_necessary(facedir_s, facemod_s, facedir_n, facemod_n):
     # If we are not using YaspGrid, all variants need to be realized
     if not get_form_option("diagonal_transformation_matrix"):
-        return True
+        # Reduce the variability according to grid info file
+        if get_option("grid_info") is not None:
+            filename = get_option("grid_info")
+            with open(filename) as csv_file:
+                csv_reader = csv.reader(csv_file, delimiter=" ")
+                for row in csv_reader:
+                    if (row[0] == 'skeleton' and
+                            facedir_s == int(row[1])) and \
+                            (facemod_s == int(row[2])) and \
+                            (facedir_n == int(row[3])) and \
+                            (facemod_n == int(row[4])):
+                        return True
+                return False
+        else:
+            return True
 
     # The PDELab machineries visit-once policy combined with Yasp avoids any visits
     # with facemod_s being True
@@ -98,6 +114,7 @@ def generate_exterior_facet_switch():
                                                                               ),
                                                               args))
 
+    block.append('    default: DUNE_THROW(Dune::Exception, "Variation not implemented.");')
     block.append("  }")
     block.append("}")
 
@@ -130,6 +147,7 @@ def generate_interior_facet_switch():
                                                                                           ),
                                                                           args))
 
+    block.append('    default: DUNE_THROW(Dune::Exception, "Variation not implemented.");')
     block.append("  }")
     block.append("}")
 
diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py
index 99107c9cd8f6a4429965985fb8fbbdcbd3e898e9..de13c45c3105e8c0e7411d1d849f7f55f4629e96 100644
--- a/python/dune/perftool/sumfact/tabulation.py
+++ b/python/dune/perftool/sumfact/tabulation.py
@@ -20,6 +20,7 @@ from dune.perftool.generation import (class_member,
                                       )
 from dune.perftool.loopy.target import dtype_floatingpoint
 from dune.perftool.loopy.vcl import ExplicitVCLCast, get_vcl_type_size
+from dune.perftool.options import get_option
 from dune.perftool.pdelab.localoperator import (name_domain_field,
                                                 lop_template_range_field,
                                                 )
@@ -273,8 +274,13 @@ def local_quadrature_points_per_direction():
     qps_per_dir = quadrature_points_per_direction()
     if local_dimension() != world_dimension():
         facedir = get_global_context_value('facedir_s')
-        assert(get_global_context_value('facedir_s') == get_global_context_value('facedir_n') or
-               get_global_context_value('integral_type') == 'exterior_facet')
+        if not get_option("grid_unstructured"):
+            assert(get_global_context_value('facedir_s') == get_global_context_value('facedir_n') or
+                   get_global_context_value('integral_type') == 'exterior_facet')
+        if get_option("grid_unstructured"):
+            # For unstructured grids the amount of quadrature points could be different for
+            # self and neighbor. For now assert that this case is not happining.
+            assert len(set(qps_per_dir)) == 1
         qps_per_dir = qps_per_dir[:facedir] + qps_per_dir[facedir + 1:]
     return qps_per_dir
 
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index cbede6c30fa7d6754b58b57b662ba126a1dec6b5..9e14391c5aef7656dfcc6e0fe8276ffcf3016505 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -464,7 +464,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
             if isinstance(i, int) and isinstance(j, int) and i != j:
                 return 0
 
-        return self.interface.pymbolic_jacobian_inverse_transposed(i, j, restriction)
+        return self.interface.pymbolic_jacobian_inverse(i, j, restriction)
 
     def jacobian(self, o):
         raise NotImplementedError("How did you get Jacobian into your form? We only support JacobianInverse right now. Report!")
diff --git a/python/setup.py b/python/setup.py
index f193748a5e814895f23fee033c74c850aaf39573..476a9f6f7b5399c04837af8eb5e627f4182156f2 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -30,6 +30,7 @@ setup(name='dune.perftool',
       author='Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>',
       url='http://conan2.iwr.uni-heidelberg.de/git/dominic/dune-perftool',
       packages=['dune.perftool',
+                'dune.perftool.blockstructured',
                 'dune.perftool.cgen',
                 'dune.perftool.generation',
                 'dune.perftool.loopy',
diff --git a/test/sumfact/poisson/CMakeLists.txt b/test/sumfact/poisson/CMakeLists.txt
index 824f2721f163a46d6a76ac9c256c95b3f9885c4c..260a5f53dc643548433aa6ce8c32a83cd3f0bd61 100644
--- a/test/sumfact/poisson/CMakeLists.txt
+++ b/test/sumfact/poisson/CMakeLists.txt
@@ -34,6 +34,34 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_3d.ufl
                                   INIFILE poisson_dg_3d.mini
                                   )
 
+
+#===============================
+# Poisson on 'unstructured grid'
+#===============================
+dune_add_formcompiler_system_test(UFLFILE poisson_2d.ufl
+                                  BASENAME sumfact_poisson_2d_unstructured
+                                  INIFILE poisson_2d_unstructured.mini
+                                  )
+dune_add_formcompiler_system_test(UFLFILE poisson_3d.ufl
+                                  BASENAME sumfact_poisson_3d_unstructured
+                                  INIFILE poisson_3d_unstructured.mini
+                                  )
+
+#==================================
+# Poisson DG on 'unstructured grid'
+#==================================
+dune_add_formcompiler_system_test(UFLFILE poisson_dg_2d.ufl
+                                  BASENAME sumfact_poisson_dg_2d_unstructured
+                                  INIFILE poisson_dg_2d_unstructured.mini
+                                  ANALYZE_GRID
+                                  )
+dune_add_formcompiler_system_test(UFLFILE poisson_dg_3d.ufl
+                                  BASENAME sumfact_poisson_dg_3d_unstructured
+                                  INIFILE poisson_dg_3d_unstructured.mini
+                                  ANALYZE_GRID
+                                  )
+
+
 #=============================================
 # Poisson DG using FastDGGridOperator in 2D/3D
 #=============================================
@@ -49,10 +77,10 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_3d.ufl
 #================================
 # 'Poisson' DG with a full tensor
 #================================
-dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
-                                  BASENAME sumfact_poisson_dg_tensor
-                                  INIFILE poisson_dg_tensor.mini
-                                  )
+# dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
+#                                   BASENAME sumfact_poisson_dg_tensor
+#                                   INIFILE poisson_dg_tensor.mini
+#                                   )
 
 # Slicing vectorization stratgies
 
diff --git a/test/sumfact/poisson/poisson_2d_unstructured.mini b/test/sumfact/poisson/poisson_2d_unstructured.mini
new file mode 100644
index 0000000000000000000000000000000000000000..b797809b2f626a3d8e9a0aa546df4e549f12e3fe
--- /dev/null
+++ b/test/sumfact/poisson/poisson_2d_unstructured.mini
@@ -0,0 +1,34 @@
+__name = sumfact_poisson_2d_unstructured_{__exec_suffix}
+__exec_suffix = {deg_suffix}_{diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
+deg_suffix = deg{formcompiler.ufl_variants.degree}
+
+{deg_suffix} == deg1 | exclude
+{diff_suffix} == numdiff | exclude
+{quadvec_suffix} == quadvec | exclude
+{gradvec_suffix} == gradvec | exclude
+
+lowerleft = 0.0 0.0
+upperright = 1.0 1.0
+elements = 16 16
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+compare_l2errorsquared = 5e-5, 5e-7 | expand deg
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 1, 0 | expand num
+sumfact = 1
+vectorization_quadloop = 1, 0 | expand quad
+vectorization_strategy = explicit, none | expand grad
+
+[formcompiler.ufl_variants]
+degree = 1, 2 | expand deg
diff --git a/test/sumfact/poisson/poisson_3d_unstructured.mini b/test/sumfact/poisson/poisson_3d_unstructured.mini
new file mode 100644
index 0000000000000000000000000000000000000000..05f9a1d10e7068c6bb7f8c3c1742ed97f9ee86ab
--- /dev/null
+++ b/test/sumfact/poisson/poisson_3d_unstructured.mini
@@ -0,0 +1,34 @@
+__name = sumfact_poisson_3d_unstructured_{__exec_suffix}
+__exec_suffix = {deg_suffix}_{diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
+deg_suffix = deg{formcompiler.ufl_variants.degree}
+
+{deg_suffix} == deg1 | exclude
+{diff_suffix} == numdiff | exclude
+{quadvec_suffix} == quadvec | exclude
+{gradvec_suffix} == gradvec | exclude
+
+lowerleft = 0.0 0.0 0.0
+upperright = 1.0 1.0 1.0
+elements = 8 8 8
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+compare_l2errorsquared = 1e-4, 1e-8 | expand deg
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 1, 0 | expand num
+sumfact = 1
+vectorization_quadloop = 1, 0 | expand quad
+vectorization_strategy = explicit, none | expand grad
+
+[formcompiler.ufl_variants]
+degree = 1, 2 | expand deg
diff --git a/test/sumfact/poisson/poisson_dg_2d_unstructured.mini b/test/sumfact/poisson/poisson_dg_2d_unstructured.mini
new file mode 100644
index 0000000000000000000000000000000000000000..639b96da2988d7f50f4fbb4b1569f46997d21d31
--- /dev/null
+++ b/test/sumfact/poisson/poisson_dg_2d_unstructured.mini
@@ -0,0 +1,34 @@
+__name = sumfact_poisson_dg_2d_unstructured_{__exec_suffix}
+__exec_suffix = {deg_suffix}_{diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
+deg_suffix = deg{formcompiler.ufl_variants.degree}
+
+{deg_suffix} == deg1 | exclude
+{diff_suffix} == numdiff | exclude
+{quadvec_suffix} == quadvec | exclude
+{gradvec_suffix} == gradvec | exclude
+
+lowerleft = 0.0 0.0
+upperright = 1.0 1.0
+elements = 16 16
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+compare_l2errorsquared = 5e-5, 5e-5 | expand deg
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 1, 0 | expand num
+sumfact = 1
+vectorization_quadloop = 1, 0 | expand quad
+vectorization_strategy = explicit, none | expand grad
+
+[formcompiler.ufl_variants]
+degree = 1, 2 | expand deg
diff --git a/test/sumfact/poisson/poisson_dg_3d_unstructured.mini b/test/sumfact/poisson/poisson_dg_3d_unstructured.mini
new file mode 100644
index 0000000000000000000000000000000000000000..012b3356b887eedc31a848b714790d2ac633445b
--- /dev/null
+++ b/test/sumfact/poisson/poisson_dg_3d_unstructured.mini
@@ -0,0 +1,35 @@
+__name = sumfact_poisson_dg_3d_unstructured_{__exec_suffix}
+__exec_suffix = {deg_suffix}_{diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
+
+diff_suffix = numdiff, symdiff | expand num
+quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
+deg_suffix = deg{formcompiler.ufl_variants.degree}
+
+{deg_suffix} == deg1 | exclude
+{diff_suffix} == numdiff | exclude
+{quadvec_suffix} == quadvec | exclude
+{gradvec_suffix} == gradvec | exclude
+
+lowerleft = 0.0 0.0 0.0
+upperright = 1.0 1.0 1.0
+elements = 8 8 8
+elementType = quadrilateral
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+compare_l2errorsquared = 1e-4, 5e-6 | expand deg
+grid_unstructured = 1
+
+[formcompiler.r]
+numerical_jacobian = 1, 0 | expand num
+sumfact = 1
+sumfact_regular_jacobians = 1
+vectorization_quadloop = 1, 0 | expand quad
+vectorization_strategy = explicit, none | expand grad
+
+[formcompiler.ufl_variants]
+degree = 1, 2 | expand deg
diff --git a/test/sumfact/poisson/poisson_dg_tensor.ufl b/test/sumfact/poisson/poisson_dg_tensor.ufl
index 2734383f71d3afddc39c6c72f41ba5545def3b54..50af4173dd5f1a6a5c744d31f7cbe0f74befe3c5 100644
--- a/test/sumfact/poisson/poisson_dg_tensor.ufl
+++ b/test/sumfact/poisson/poisson_dg_tensor.ufl
@@ -25,15 +25,15 @@ gamma_int = (alpha * degree * (degree + dim - 1)) / h_int
 # SIPG: -1.0, IIPG: 0.0, NIPG: 1.0
 theta = 1.0
 
-r = inner(grad(u), grad(v))*dx \
-  - f*v*dx \
-  - inner(n, avg(grad(u)))*jump(v)*dS \
+r = inner(A*grad(u), grad(v))*dx \
+  - (c*u-f)*v*dx \
+  - inner(n, A*avg(grad(u)))*jump(v)*dS \
   + gamma_int*jump(u)*jump(v)*dS \
-  + theta*jump(u)*inner(avg(grad(v)), n)*dS \
-  - inner(n, grad(u))*v*ds \
+  + theta*jump(u)*inner(A*avg(grad(v)), n)*dS \
+  - inner(n, A*grad(u))*v*ds \
   + gamma_ext*u*v*ds \
-  + theta*u*inner(grad(v), n)*ds \
+  + theta*u*inner(A*grad(v), n)*ds \
   - gamma_ext*g*v*ds \
-  - theta*g*inner(grad(v), n)*ds
+  - theta*g*inner(A*grad(v), n)*ds
 
 exact_solution = g