diff --git a/cmake/modules/GeneratedSystemtests.cmake b/cmake/modules/GeneratedSystemtests.cmake
index de9dc259ffd2442bde0c522e2883e79b461ba621..d42b8aebf18ec27890e5bbdaf995a642e642d81c 100644
--- a/cmake/modules/GeneratedSystemtests.cmake
+++ b/cmake/modules/GeneratedSystemtests.cmake
@@ -67,10 +67,15 @@ function(dune_add_formcompiler_system_test)
 
       _add_test(NAME ${tname}
                 COMMAND ${CMAKE_BINARY_DIR}/run-in-dune-env ${SYSTEMTEST_SCRIPT}
-                      --exec ${tname}
-                      --ini "${CMAKE_CURRENT_BINARY_DIR}/${inifile}"
-                      --source ${CMAKE_CURRENT_SOURCE_DIR}
-                )
+                --exec ${tname}
+                --ini "${CMAKE_CURRENT_BINARY_DIR}/${inifile}"
+                --source ${CMAKE_CURRENT_SOURCE_DIR}
+                --mpi-exec "${MPIEXEC}"
+                --mpi-numprocflag=${MPIEXEC_NUMPROC_FLAG}
+                --mpi-preflags "${MPIEXEC_PREFLAGS}"
+                --mpi-postflags "${MPIEXEC_POSTFLAGS}"
+                --max-processors=${DUNE_MAX_TEST_CORES}
+               )
 
       set_tests_properties(${tname} PROPERTIES SKIP_RETURN_CODE 77)
       set_tests_properties(${tname} PROPERTIES TIMEOUT 60)
diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py
index 101ed2a7e827f8369ed72df5827dd6a7f376b367..694d7f1292630f5bf6f451cc5497746c5599a4b9 100644
--- a/python/dune/perftool/compile.py
+++ b/python/dune/perftool/compile.py
@@ -16,7 +16,9 @@ from ufl.algorithms.formfiles import interpret_ufl_namespace
 from dune.perftool.generation import (delete_cache_items,
                                       global_context,
                                       )
-from dune.perftool.options import get_option, initialize_options
+from dune.perftool.options import (get_option,
+                                   initialize_options,
+                                   )
 from dune.perftool.pdelab.driver import generate_driver
 from dune.perftool.pdelab.localoperator import (generate_localoperator_basefile,
                                                 generate_localoperator_file,
diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index be2efcff1b534c45d70ad264e1512a9342eaef97..adf7e8ce483fd36bdaa44a90fa8b5f54bb25e442 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -42,6 +42,8 @@ class PerftoolOptionsArray(ImmutableRecord):
     explicit_time_stepping = PerftoolOption(default=False, helpstr="use explicit time stepping")
     exact_solution_expression = PerftoolOption(helpstr="name of the exact solution expression in the ufl file")
     compare_l2errorsquared = PerftoolOption(helpstr="maximal allowed l2 error squared of difference between numerical solution and interpolation of exact solution (NOTE: requires --exact-solution-expression)")
+    l2error_tree_path = PerftoolOption(default=None, helpstr="Tree pathes that should be considered for l2 error calculation. Default None means we take all of them into account.")
+    interactive = PerftoolOption(default=False, helpstr="whether the optimization process should be guided interactively (also useful for debugging)")
     print_transformations = PerftoolOption(default=False, helpstr="print out dot files after ufl tree transformations")
     print_transformations_dir = PerftoolOption(default=".", helpstr="place where to put dot files (can be omitted)")
     quadrature_order = PerftoolOption(_type=int, helpstr="Quadrature order used for all integrals.")
@@ -75,7 +77,8 @@ class PerftoolOptionsArray(ImmutableRecord):
     precompute_quadrature_info = PerftoolOption(default=True, helpstr="compute quadrature points and weights in the constructor of the local operator")
     blockstructured = PerftoolOption(default=False, helpstr="Use block structure")
     number_of_blocks = PerftoolOption(default=1, helpstr="Number of sub blocks in one direction")
-
+    overlapping = PerftoolOption(default=False, helpstr="Use an overlapping solver and constraints. You still need to make sure to construct a grid with overlap! The parallel option will be set automatically.")
+    parallel = PerftoolOption(default=False, helpstr="Mark that this program should be run in parallel. If set to true the c++ code will check that there are more than 1 MPI-ranks involved and the error computation will use communication.")
 
 # Until more sophisticated logic is needed, we keep the actual option data in this module
 _options = PerftoolOptionsArray()
@@ -129,6 +132,9 @@ def process_options(opt):
     if opt.numerical_jacobian:
         opt = opt.copy(generate_jacobians=False)
 
+    if opt.overlapping:
+        opt = opt.copy(parallel=True)
+
     return opt
 
 
diff --git a/python/dune/perftool/pdelab/driver/__init__.py b/python/dune/perftool/pdelab/driver/__init__.py
index 049127442d4a5b4a6c4a07325312516feac2cebd..14d0bd6ccee946ce78cf311685b0ac44d685edfd 100644
--- a/python/dune/perftool/pdelab/driver/__init__.py
+++ b/python/dune/perftool/pdelab/driver/__init__.py
@@ -251,10 +251,24 @@ def name_mpihelper():
     return name
 
 
+@preamble
+def check_parallel_execution():
+    from dune.perftool.pdelab.driver.gridfunctionspace import name_leafview
+    gv = name_leafview()
+    return ["if ({}.comm().size()==1){{".format(gv),
+            '  std::cout << "This program should be run in parallel!"  << std::endl;',
+            "  return 1;",
+            "}"]
+
+
 def generate_driver(formdatas, data):
     # The driver module uses a global dictionary for storing necessary data
     set_driver_data(formdatas, data)
 
+    # Add check to c++ file if this program should only be used in parallel mode
+    if get_option("parallel"):
+        check_parallel_execution()
+
     # Entrypoint for driver generation
     if get_option("opcounter") or get_option("time_opcounter"):
         if get_option("time_opcounter"):
diff --git a/python/dune/perftool/pdelab/driver/constraints.py b/python/dune/perftool/pdelab/driver/constraints.py
index 9158fc6be52d5c95c976eb08041fc3ad5b4a5da6..901133c853819c45f7cae6af21ba212346d4adf2 100644
--- a/python/dune/perftool/pdelab/driver/constraints.py
+++ b/python/dune/perftool/pdelab/driver/constraints.py
@@ -24,14 +24,26 @@ def name_assembled_constraints():
     return name
 
 
+def has_dirichlet_constraints(is_dirichlet):
+    if isinstance(is_dirichlet, (list, tuple)):
+        return any(bool(d) for d in is_dirichlet)
+    else:
+        return bool(is_dirichlet)
+
+
 @preamble
 def assemble_constraints(name):
     element = get_trial_element()
     gfs = name_trial_gfs()
     is_dirichlet = preprocess_leaf_data(element, "is_dirichlet")
-    bctype_function = name_bctype_function(element, is_dirichlet)
-    return "Dune::PDELab::constraints({}, {}, {});".format(bctype_function,
-                                                           gfs,
+    if has_dirichlet_constraints(is_dirichlet):
+        bctype_function = name_bctype_function(element, is_dirichlet)
+        return "Dune::PDELab::constraints({}, {}, {});".format(bctype_function,
+                                                               gfs,
+                                                               name,
+                                                               )
+    else:
+        return "Dune::PDELab::constraints({}, {});".format(gfs,
                                                            name,
                                                            )
 
diff --git a/python/dune/perftool/pdelab/driver/error.py b/python/dune/perftool/pdelab/driver/error.py
index 6e7fb9128e5437650baf140191311ee4a02eb5f7..b5b616627c111f8eeaef6b29e31c307fc7a40c14 100644
--- a/python/dune/perftool/pdelab/driver/error.py
+++ b/python/dune/perftool/pdelab/driver/error.py
@@ -9,7 +9,8 @@ from dune.perftool.pdelab.driver import (get_formdata,
                                          get_trial_element,
                                          preprocess_leaf_data,
                                          )
-from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs,
+from dune.perftool.pdelab.driver.gridfunctionspace import (name_leafview,
+                                                           name_trial_gfs,
                                                            name_trial_subgfs,
                                                            type_range,
                                                            )
@@ -104,14 +105,21 @@ def _accumulate_L2_squared(treepath):
 
     strtp = ", ".join(str(t) for t in treepath)
 
+    gv = name_leafview()
+    sum_error_over_ranks = ""
+    if get_option("parallel"):
+        sum_error_over_ranks = "  err = {}.comm().sum(err);".format(gv)
     return ["{",
             "  // L2 error squared of difference between numerical",
             "  // solution and the interpolation of exact solution",
             "  // for treepath ({})".format(strtp),
             "  typename decltype({})::Traits::RangeType err(0.0);".format(dsa),
             "  Dune::PDELab::integrateGridFunction({}, err, 10);".format(dsa),
+            sum_error_over_ranks,
             "  {} += err;".format(accum_error),
-            "  std::cout << \"L2 Error for treepath {}: \" << err << std::endl;".format(strtp),
+            "  if ({}.comm().rank() == 0){{".format(gv),
+            "    std::cout << \"L2 Error for treepath {}: \" << err << std::endl;".format(strtp),
+            "  }"
             "}",
             ]
 
@@ -139,8 +147,13 @@ def treepath_to_index(element, treepath, offset=0):
 def accumulate_L2_squared():
     element = get_trial_element()
     if isinstance(element, MixedElement):
-        for i in range(element.value_size()):
-            _accumulate_L2_squared(get_treepath(element, i))
+        tree_pathes = (True,) * element.value_size()
+        if get_option("l2error_tree_path") is not None:
+            tree_pathes = list(map(int, get_option("l2error_tree_path").split(',')))
+            assert len(tree_pathes) == element.value_size()
+        for i, path in enumerate(tree_pathes):
+            if path:
+                _accumulate_L2_squared(get_treepath(element, i))
     else:
         _accumulate_L2_squared(())
 
@@ -160,12 +173,15 @@ def name_accumulated_L2_error():
 @preamble
 def compare_L2_squared():
     accumulate_L2_squared()
+    gv = name_leafview()
 
     accum_error = name_accumulated_L2_error()
     fail = name_test_fail_variable()
     return ["using std::abs;",
             "using std::isnan;",
-            "std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error),
+            "if ({}.comm().rank() == 0){{".format(gv),
+            "  std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error),
+            "}",
             "if (isnan({0}) or abs({0})>{1})".format(accum_error, get_option("compare_l2errorsquared")),
             "  {} = true;".format(fail)]
 
diff --git a/python/dune/perftool/pdelab/driver/gridfunctionspace.py b/python/dune/perftool/pdelab/driver/gridfunctionspace.py
index f4392e367147b46774fffa84c63a98e0bf8bbed1..c611f264f397551ed1dbad89cd4a18e9569a02fa 100644
--- a/python/dune/perftool/pdelab/driver/gridfunctionspace.py
+++ b/python/dune/perftool/pdelab/driver/gridfunctionspace.py
@@ -367,6 +367,18 @@ def type_orderingtag(leaf):
         return "Dune::PDELab::EntityBlockedOrderingTag"
 
 
+@preamble
+def typedef_overlapping_dirichlet_constraintsassembler(name):
+    include_file("dune/pdelab/constraints/conforming.hh", filetag="driver")
+    return "using {} = Dune::PDELab::ConformingDirichletConstraints;".format(name)
+
+
+@preamble
+def typedef_p0parallel_constraintsassembler(name):
+    include_file("dune/pdelab/constraints/p0.hh", filetag="driver")
+    return "using {} = Dune::PDELab::P0ParallelConstraints;".format(name)
+
+
 @preamble
 def typedef_dirichlet_constraintsassembler(name):
     include_file("dune/pdelab/constraints/conforming.hh", filetag="driver")
@@ -380,14 +392,21 @@ def typedef_no_constraintsassembler(name):
 
 def type_constraintsassembler(is_dirichlet):
     assert isinstance(is_dirichlet, bool)
-    if is_dirichlet:
+    overlapping = get_option("overlapping")
+    if is_dirichlet and not overlapping:
         name = "DirichletConstraintsAssember"
         typedef_dirichlet_constraintsassembler(name)
-        return name
+    elif is_dirichlet and overlapping:
+        name = "OverlappingConformingDirichletConstraints"
+        typedef_overlapping_dirichlet_constraintsassembler(name)
+    elif not is_dirichlet and overlapping:
+        name = "P0ParallelConstraints"
+        typedef_p0parallel_constraintsassembler(name)
     else:
+        assert not is_dirichlet and not overlapping
         name = "NoConstraintsAssembler"
         typedef_no_constraintsassembler(name)
-        return name
+    return name
 
 
 def name_trial_subgfs(treepath):
diff --git a/python/dune/perftool/pdelab/driver/gridoperator.py b/python/dune/perftool/pdelab/driver/gridoperator.py
index 4727f4f281ed6e8c6205cf5613fc778b07c476ac..d0244c107240fb5175329cac47ed145ffbdf02fb 100644
--- a/python/dune/perftool/pdelab/driver/gridoperator.py
+++ b/python/dune/perftool/pdelab/driver/gridoperator.py
@@ -170,7 +170,8 @@ def define_parameters(name, formdata):
     return "{} {};".format(partype, name)
 
 
-def name_parameters(formdata):
+def name_parameters(formdata, define=True):
     name = form_name_suffix("params", formdata)
-    define_parameters(name, formdata)
+    if define:
+        define_parameters(name, formdata)
     return name
diff --git a/python/dune/perftool/pdelab/driver/instationary.py b/python/dune/perftool/pdelab/driver/instationary.py
index 5c9bf3f6be64b6d2f0bd0037ef680ae528a456b7..3ed2635b91aeb5e3c1af777eea4cbbd2aa0f76aa 100644
--- a/python/dune/perftool/pdelab/driver/instationary.py
+++ b/python/dune/perftool/pdelab/driver/instationary.py
@@ -14,7 +14,8 @@ from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs,
 from dune.perftool.pdelab.driver.gridoperator import (name_gridoperator,
                                                       name_parameters,
                                                       type_gridoperator,)
-from dune.perftool.pdelab.driver.constraints import (name_bctype_function,
+from dune.perftool.pdelab.driver.constraints import (has_dirichlet_constraints,
+                                                     name_bctype_function,
                                                      name_constraintscontainer,
                                                      )
 from dune.perftool.pdelab.driver.interpolate import (interpolate_dirichlet_data,
@@ -53,24 +54,35 @@ def time_loop():
     params = name_parameters(formdata)
     time = name_time()
     element = get_trial_element()
-    is_dirichlet = preprocess_leaf_data(element, "is_dirichlet")
-    bctype = name_bctype_function(element, is_dirichlet)
-    gfs = name_trial_gfs()
-    cc = name_constraintscontainer()
     vector_type = type_vector(formdata)
     vector = name_vector(formdata)
     interpolate_dirichlet_data(vector)
 
+    is_dirichlet = preprocess_leaf_data(element, "is_dirichlet")
+    assemble_new_constraints = ""
+    if has_dirichlet_constraints(is_dirichlet):
+        bctype = name_bctype_function(element, is_dirichlet)
+        gfs = name_trial_gfs()
+        cc = name_constraintscontainer()
+        assemble_new_constraints = ("  // Assemble constraints for new time step\n"
+                                    "  {}.setTime({}+dt);\n"
+                                    "  Dune::PDELab::constraints({}, {}, {});\n"
+                                    "\n".format(params, time, bctype, gfs, cc)
+                                    )
+
     # Choose between explicit and implicit time stepping
     explicit = get_option('explicit_time_stepping')
     if explicit:
         osm = name_explicitonestepmethod()
         apply_call = "{}.apply(time, dt, {}, {}new);".format(osm, vector, vector)
     else:
+        osm = name_onestepmethod()
+    if has_dirichlet_constraints(is_dirichlet):
         dirichlet = preprocess_leaf_data(element, "interpolate_expression")
         boundary = name_boundary_function(element, dirichlet)
-        osm = name_onestepmethod()
         apply_call = "{}.apply(time, dt, {}, {}, {}new);".format(osm, vector, boundary, vector)
+    else:
+        apply_call = "{}.apply(time, dt, {}, {}new);".format(osm, vector, vector)
 
     # Setup visualization
     visualize_initial_condition()
@@ -82,10 +94,7 @@ def time_loop():
             "int step_number(0);"
             "int output_every_nth = {}.get<int>(\"instat.output_every_nth\", 1);".format(ini),
             "while (time<T-1e-8){",
-            "  // Assemble constraints for new time step",
-            "  {}.setTime({}+dt);".format(params, time),
-            "  Dune::PDELab::constraints({}, {}, {});".format(bctype, gfs, cc),
-            "",
+            "{}".format(assemble_new_constraints),
             "  // Do time step",
             "  {} {}new({});".format(vector_type, vector, vector),
             "  {}".format(apply_call),
diff --git a/python/dune/perftool/pdelab/driver/solve.py b/python/dune/perftool/pdelab/driver/solve.py
index e877ece672cf79f7db0ed373e96257d9fbfb98a7..9b4cad9154f0de3a968c19a500905f849f23628b 100644
--- a/python/dune/perftool/pdelab/driver/solve.py
+++ b/python/dune/perftool/pdelab/driver/solve.py
@@ -7,7 +7,12 @@ from dune.perftool.pdelab.driver import (form_name_suffix,
                                          is_linear,
                                          name_initree,
                                          )
-from dune.perftool.pdelab.driver.gridfunctionspace import name_trial_gfs
+from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs,
+                                                           type_trial_gfs,
+                                                           )
+from dune.perftool.pdelab.driver.constraints import (type_constraintscontainer,
+                                                     name_assembled_constraints,
+                                                     )
 from dune.perftool.pdelab.driver.gridoperator import (name_gridoperator,
                                                       type_gridoperator,
                                                       )
@@ -104,7 +109,12 @@ def define_vector(name, formdata):
 @preamble
 def typedef_linearsolver(name):
     include_file("dune/pdelab/backend/istl.hh", filetag="driver")
-    return "using {} = Dune::PDELab::ISTLBackend_SEQ_SuperLU;".format(name)
+    if get_option('overlapping'):
+        gfs = type_trial_gfs()
+        cc = type_constraintscontainer()
+        return "using {} = Dune::PDELab::ISTLBackend_OVLP_BCGS_ILU0<{},{}>;".format(name, gfs, cc)
+    else:
+        return "using {} = Dune::PDELab::ISTLBackend_SEQ_SuperLU;".format(name)
 
 
 def type_linearsolver():
@@ -116,7 +126,12 @@ def type_linearsolver():
 @preamble
 def define_linearsolver(name):
     lstype = type_linearsolver()
-    return "{} {}(false);".format(lstype, name)
+    if get_option('overlapping'):
+        gfs = name_trial_gfs()
+        cc = name_assembled_constraints()
+        return "{} {}({}, {});".format(lstype, name, gfs, cc)
+    else:
+        return "{} {}(false);".format(lstype, name)
 
 
 def name_linearsolver():
diff --git a/python/dune/perftool/pdelab/parameter.py b/python/dune/perftool/pdelab/parameter.py
index 63d47bf090ea69a4e14663f0ff94e2b42b5fa981..36e5cd228fec281a27e17c99fa4006ee95d9e27e 100644
--- a/python/dune/perftool/pdelab/parameter.py
+++ b/python/dune/perftool/pdelab/parameter.py
@@ -42,7 +42,7 @@ def define_parameterclass(name):
 def name_paramclass():
     formdata = get_global_context_value("formdata")
     from dune.perftool.pdelab.driver.gridoperator import name_parameters
-    name = name_parameters(formdata)
+    name = name_parameters(formdata, define=False)
     define_parameterclass(name)
     return name
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d0e1df820ce14b12a2bada056ae67e8bce81c318..3dc0eec4dce83ee6eaf25d85711442cb566c3c6d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,10 +1,11 @@
 add_subdirectory(hyperbolic)
 add_subdirectory(heatequation)
 add_subdirectory(laplace)
+add_subdirectory(navier-stokes)
 add_subdirectory(nonlinear)
 add_subdirectory(poisson)
 add_subdirectory(stokes)
 
 add_subdirectory(sumfact)
 
-add_subdirectory(blockstructured)
\ No newline at end of file
+add_subdirectory(blockstructured)
diff --git a/test/navier-stokes/CMakeLists.txt b/test/navier-stokes/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b87827a05984dbdded01684d7b328d80d48895b1
--- /dev/null
+++ b/test/navier-stokes/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_subdirectory(reference_program)
+
+dune_add_formcompiler_system_test(UFLFILE navierstokes_2d_dg_quadrilateral.ufl
+                                  BASENAME navierstokes_2d_dg_quadrilateral
+                                  INIFILE navierstokes_2d_dg_quadrilateral.mini
+                                  SCRIPT dune_execute_parallel.py
+                                  )
+
+dune_add_formcompiler_system_test(UFLFILE navierstokes_3d_dg_quadrilateral.ufl
+                                  BASENAME navierstokes_3d_dg_quadrilateral
+                                  INIFILE navierstokes_3d_dg_quadrilateral.mini
+                                  )
diff --git a/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini
new file mode 100644
index 0000000000000000000000000000000000000000..e8132ebb4ebefbf2d7eee39039c3c89959ea0aab
--- /dev/null
+++ b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.mini
@@ -0,0 +1,33 @@
+__name = navierstokes_2d_dg_quadrilateral_{__exec_suffix}
+__exec_suffix = symdiff, numdiff | expand num
+
+cells = 16 16
+lowerleft = -1. -1.
+extension = 2. 2.
+periodic = true true
+
+printmatrix = false
+
+[wrapper.execute_parallel]
+numprocessors = 4
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 0, 1 | expand num
+compare_l2errorsquared = 5e-5
+# Only calculate error for the velocity part
+l2error_tree_path = 1, 1, 0
+explicit_time_stepping = 0
+grid_offset = 1
+overlapping = 1
+
+[instat]
+T = 1e-2
+dt = 1e-3
+nth = 1
+
+# Disable numdiff tests
+{__exec_suffix} == numdiff | exclude
diff --git a/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..fc8a150570cc80f648f985b154dac8efc7a3f68b
--- /dev/null
+++ b/test/navier-stokes/navierstokes_2d_dg_quadrilateral.ufl
@@ -0,0 +1,48 @@
+# Taylor-Green vortex
+
+cell = quadrilateral
+degree = 2
+dim = 2
+
+x = SpatialCoordinate(cell)
+time = get_time(cell)
+
+P2 = VectorElement("DG", cell, degree)
+P1 = FiniteElement("DG", cell, degree-1)
+TH = P2 * P1
+
+v, q = TestFunctions(TH)
+u, p = TrialFunctions(TH)
+
+n = FacetNormal(cell)('+')
+
+rho = 1.0
+mu = 1.0/100.0
+
+g_v = as_vector((-exp(-2*pi*mu/rho*time)*cos(pi*x[0])*sin(pi*x[1]),
+                 exp(-2*pi*mu/rho*time)*sin(pi*x[0])*cos(pi*x[1])))
+g_p = -0.25*rho*exp(-4*pi*pi*mu/rho*time)*(cos(2*pi*x[0])+cos(2*pi*x[1]))
+
+# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0
+theta = -1.0
+
+# penalty factor
+alpha = 1.0
+h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell)
+gamma_int = (alpha * degree * (degree + dim - 1)) / h_int
+
+mass = rho*inner(u,v)*dx
+
+r = mu * inner(grad(u), grad(v))*dx \
+  - p*div(v)*dx \
+  - q*div(u)*dx \
+  + rho * inner(grad(u)*u,v)*dx \
+  + mu * inner(avg(grad(u))*n, jump(v))*dS \
+  + mu * gamma_int * inner(jump(u), jump(v))*dS \
+  - mu * theta * inner(avg(grad(v))*n, jump(u))*dS \
+  - avg(p)*inner(jump(v), n)*dS \
+  - avg(q)*inner(jump(u), n)*dS \
+
+forms = [mass,r]
+interpolate_expression = g_v, g_p
+exact_solution = g_v, g_p
diff --git a/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini
new file mode 100644
index 0000000000000000000000000000000000000000..1d6c6f5ddd3f64165f35398354b39c57882825ae
--- /dev/null
+++ b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.mini
@@ -0,0 +1,27 @@
+__name = navierstokes_3d_dg_quadrilateral_{__exec_suffix}
+__exec_suffix = symdiff, numdiff | expand num
+
+cells = 4 4 4
+lowerleft = -1. -1. -1.
+extension = 2. 2. 2.
+
+printmatrix = false
+
+[wrapper.vtkcompare]
+name = {__name}
+extension = vtu
+
+[formcompiler]
+numerical_jacobian = 0, 1 | expand num
+explicit_time_stepping = 0
+grid_offset = 1
+compare_l2errorsquared = 5e-4
+# Only calculate error for the velocity part
+l2error_tree_path = 1, 1, 1, 0
+
+[instat]
+T = 1e-1
+dt = 5e-2
+
+# Disable numdiff tests
+{__exec_suffix} == numdiff | exclude
diff --git a/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl
new file mode 100644
index 0000000000000000000000000000000000000000..876821975e53f660c7abd81766b2a7f0080d97da
--- /dev/null
+++ b/test/navier-stokes/navierstokes_3d_dg_quadrilateral.ufl
@@ -0,0 +1,78 @@
+# Beltrami flow
+
+cell = hexahedron
+degree = 2
+dim = 2
+
+x = SpatialCoordinate(cell)
+time = get_time(cell)
+
+P2 = VectorElement("DG", cell, degree)
+P1 = FiniteElement("DG", cell, degree-1)
+TH = P2 * P1
+
+v, q = TestFunctions(TH)
+u, p = TrialFunctions(TH)
+
+# g_v = as_vector((4.*x[1]*(1.-x[1]), 0.0, 0.0))
+# bctype = conditional(x[0] < 1. - 1e-8, 1, 0)
+# ds = ds(subdomain_id=1, subdomain_data=bctype)
+
+n = FacetNormal(cell)('+')
+
+rho = 1.0
+mu = 1.0
+
+a = pi/4
+d = pi/2
+g_v = as_vector((-a*exp(-d*d*time)*(exp(a*x[0])*sin(d*x[2]+a*x[1])+cos(d*x[1]+a*x[0])*exp(a*x[2])),
+                 -a*exp(-d*d*time)*(exp(a*x[0])*cos(d*x[2]+a*x[1])+exp(a*x[1])*sin(a*x[2]+d*x[0])),
+                 -a*exp(-d*d*time)*(exp(a*x[1])*cos(a*x[2]+d*x[0])+sin(d*x[1]+a*x[0])*exp(a*x[2]))))
+g_p = -0.5*a*a*rho*exp(-d*d*time)*  (  2*cos(d*x[1]+a*x[0])*exp(a*(x[2]+x[0]) )*sin(d*x[2]+a*x[1])  +  2*exp(a*(x[1]+x[0]))*sin(a*x[2]+d*x[0])*cos(d*x[2]+a*x[1])  +  2*sin(d*x[1]+a*x[0])*exp(a*(x[2]+x[1]))*cos(a*x[2]+d*x[0])  +  exp(2*a*x[2])  +  exp(2*a*x[1])  +  exp(2*a*x[0])  )
+
+
+# SIPG: -1.0, IIPG: 0.0, NIPG: 1.0
+theta = -1.0
+
+# penalty factor
+alpha = 1.0
+h_ext = CellVolume(cell) / FacetArea(cell)
+gamma_ext = (alpha * degree * (degree + dim - 1)) / h_ext
+h_int = Min(CellVolume(cell)('+'), CellVolume(cell)('-')) / FacetArea(cell)
+gamma_int = (alpha * degree * (degree + dim - 1)) / h_int
+
+mass = rho*inner(u,v)*dx
+
+r = mu * inner(grad(u), grad(v))*dx \
+  - p*div(v)*dx \
+  - q*div(u)*dx \
+  + rho * inner(grad(u)*u,v)*dx \
+  + mu * inner(avg(grad(u))*n, jump(v))*dS \
+  + mu * gamma_int * inner(jump(u), jump(v))*dS \
+  - mu * theta * inner(avg(grad(v))*n, jump(u))*dS \
+  - avg(p)*inner(jump(v), n)*dS \
+  - avg(q)*inner(jump(u), n)*dS \
+  - mu * inner(grad(u)*n, v)*ds \
+  + mu * gamma_ext * inner(u-g_v, v)*ds \
+  + mu * theta * inner(grad(v)*n, u-g_v)*ds \
+  + p*inner(v, n)*ds \
+  + q*inner(u-g_v, n)*ds
+
+# r = mu * inner(grad(u), grad(v))*dx \
+#   - p*div(v)*dx \
+#   - q*div(u)*dx \
+#   + rho * inner(grad(u)*u,v)*dx \
+#   + mu * inner(avg(grad(u))*n, jump(v))*dS \
+#   + mu / h_e * sigma * inner(jump(u), jump(v))*dS \
+#   - mu * theta * inner(avg(grad(v))*n, jump(u))*dS \
+#   - avg(p)*inner(jump(v), n)*dS \
+#   - avg(q)*inner(jump(u), n)*dS \
+#   - mu * inner(grad(u)*n, v)*ds \
+#   + mu / h_e * sigma * inner(u-g_v, v)*ds \
+#   + mu * theta * inner(grad(v)*n, u-g_v)*ds \
+#   + p*inner(v, n)*ds \
+#   + q*inner(u-g_v, n)*ds
+
+forms = [mass,r]
+interpolate_expression = g_v, g_p
+exact_solution = g_v, g_p
\ No newline at end of file
diff --git a/test/navier-stokes/reference_program/CMakeLists.txt b/test/navier-stokes/reference_program/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21af3e83f8d1fdec5377291b521f8712987a494d
--- /dev/null
+++ b/test/navier-stokes/reference_program/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(taylor-green taylor-green.cc)
+dune_symlink_to_source_files(FILES taylor-green.ini)
+set_target_properties(taylor-green PROPERTIES EXCLUDE_FROM_ALL 1)
diff --git a/test/navier-stokes/reference_program/taylor-green.cc b/test/navier-stokes/reference_program/taylor-green.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e48e04296122dd89b615166f33d21219324d0726
--- /dev/null
+++ b/test/navier-stokes/reference_program/taylor-green.cc
@@ -0,0 +1,310 @@
+// -*- tab-width: 2; indent-tabs-mode: nil -*-
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <iostream>
+#include <vector>
+#include <map>
+#include <string>
+#include <random>
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/common/exceptions.hh>
+#include <dune/common/fvector.hh>
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>
+#include <dune/istl/bvector.hh>
+#include <dune/istl/operators.hh>
+#include <dune/istl/solvers.hh>
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/io.hh>
+
+#include <dune/pdelab/common/function.hh>
+#include <dune/pdelab/common/functionutilities.hh>
+#include <dune/pdelab/finiteelementmap/qkdg.hh>
+#include <dune/pdelab/gridfunctionspace/gridfunctionspaceutilities.hh>
+#include <dune/pdelab/gridfunctionspace/subspace.hh>
+#include <dune/pdelab/gridfunctionspace/vectorgridfunctionspace.hh>
+#include <dune/pdelab/gridfunctionspace/vtk.hh>
+#include <dune/pdelab/gridoperator/gridoperator.hh>
+#include <dune/pdelab/gridfunctionspace/interpolate.hh>
+#include <dune/pdelab/localoperator/dgnavierstokes.hh>
+#include <dune/pdelab/backend/istl.hh>
+#include <dune/pdelab/finiteelementmap/monomfem.hh>
+#include <dune/pdelab/common/function.hh>
+#include <dune/pdelab/common/vtkexport.hh>
+#include <dune/pdelab/constraints/p0.hh>
+#include<dune/pdelab/gridoperator/onestep.hh>
+#include<dune/pdelab/newton/newton.hh>
+#include "dune/perftool/vtkpredicate.hh"
+#include "dune/grid/io/file/vtk/vtksequencewriter.hh"
+
+#include "taylor-green.hh"
+
+
+#define PERIODIC
+// #define NORMALIZE_PRESSURE
+
+//===============================================================
+// Problem setup and solution
+//===============================================================
+template<typename GV, typename RF, int vOrder, int pOrder>
+void taylor_green (const GV& gv, const Dune::ParameterTree& configuration, std::string filename)
+{
+  // Some types
+  using ES = Dune::PDELab::AllEntitySet<GV>;
+  ES es(gv);
+  using DF = typename ES::Grid::ctype;
+  static const unsigned int dim = ES::dimension;
+  Dune::Timer timer;
+
+  // Create finite element maps
+  const int velocity_degree = 2;
+  const int pressure_degree = 1;
+  using FEM_V = Dune::PDELab::QkDGLocalFiniteElementMap<DF, RF, velocity_degree, dim>;
+  using FEM_P = Dune::PDELab::QkDGLocalFiniteElementMap<DF, RF, pressure_degree, dim>;
+  FEM_V fem_v;
+  FEM_P fem_p;
+
+  // Do not block anything and order it lexicographic
+  using VectorBackend_V = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>;
+  using VectorBackend_P = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>;
+  using VectorBackend = Dune::PDELab::istl::VectorBackend<Dune::PDELab::istl::Blocking::none>;
+  using OrderingTag_V = Dune::PDELab::LexicographicOrderingTag;
+
+  // For periodic boundary conditions in Yasp grid we need an
+  // overlap. Therefore we run our program in parallel and need these
+  // constraints
+#ifdef PERIODIC
+  using Con = Dune::PDELab::P0ParallelConstraints;
+#else
+  using Con = Dune::PDELab::NoConstraints;
+#endif
+
+  // Velocity GFS
+  using GFS_V = Dune::PDELab::VectorGridFunctionSpace<
+    ES,FEM_V,dim,
+    VectorBackend,
+    VectorBackend_V,
+    Con,
+    OrderingTag_V
+    >;
+  GFS_V gfs_v(es,fem_v);
+  gfs_v.name("v");
+
+  // Pressure GFS
+  using GFS_P = Dune::PDELab::GridFunctionSpace<
+    ES,
+    FEM_P,
+    Con,
+    VectorBackend_P>;
+  GFS_P gfs_p(es,fem_p);
+  gfs_p.name("p");
+
+
+  // GFS
+  using OrderingTag = Dune::PDELab::LexicographicOrderingTag;
+  using GFS = Dune::PDELab::CompositeGridFunctionSpace<VectorBackend,OrderingTag,GFS_V,GFS_P>;
+  GFS gfs(gfs_v, gfs_p);
+  using namespace Dune::Indices;
+  gfs_v.child(_0).name("velocity_0");
+  gfs_v.child(_1).name("velocity_1");
+  gfs_p.name("pressure");
+  gfs.name("test");
+  gfs.update();
+  using CC = typename GFS::template ConstraintsContainer<double>::Type;
+  CC cc;
+  cc.clear();
+#ifdef PERIODIC
+  Dune::PDELab::constraints(gfs,cc);
+#endif
+  std::cout << "gfs with " << gfs.size() << " dofs generated  "<< std::endl;
+  std::cout << "cc with " << cc.size() << " dofs generated  "<< std::endl;
+
+  // Parameter functions
+  using FType = ZeroVectorFunction<ES,RF,dim>;
+  FType f(es);
+  using BType = BCTypeParamGlobalDirichlet;
+  BType b;
+  using VType = TaylorGreenVelocity<ES,RF,dim>;
+  VType v(es, configuration.sub("parameters"));
+  using PType = TaylorGreenPressure<ES,RF>;
+  PType p(es, configuration.sub("parameters"));
+  using PenaltyTerm = Dune::PDELab::DefaultInteriorPenalty<RF>;
+
+  // Local operator
+  using LOP_Parameters =
+    Dune::PDELab::DGNavierStokesParameters<ES,RF,FType,BType,VType,PType,true,false,PenaltyTerm>;
+  LOP_Parameters lop_parameters(configuration.sub("parameters"),f,b,v,p);
+  using LOP = Dune::PDELab::DGNavierStokes<LOP_Parameters>;
+  const int superintegration_order = 0;
+  LOP lop(lop_parameters,superintegration_order);
+  using LOP_M = Dune::PDELab::NavierStokesMass<LOP_Parameters>;
+  LOP_M lop_m(lop_parameters,1);
+
+  // Grid operator
+  using MBE = Dune::PDELab::istl::BCRSMatrixBackend<>;
+  MBE mbe(75); // Maximal number of nonzeroes per row can be cross-checked by patternStatistics().
+  using GO_R = Dune::PDELab::GridOperator<GFS,GFS,LOP,MBE,RF,RF,RF,CC,CC>;
+  GO_R go_r(gfs,cc,gfs,cc,lop,mbe);
+  using GO_M = Dune::PDELab::GridOperator<GFS,GFS,LOP_M,MBE,RF,RF,RF,CC,CC>;
+  GO_M go_m(gfs,cc,gfs,cc,lop_m,mbe);
+  using IGO = Dune::PDELab::OneStepGridOperator<GO_R,GO_M>;
+  IGO igo(go_r,go_m);
+
+  // Create initial solution
+  using InitialVelocity = TaylorGreenVelocity<GV,RF,2>;
+  InitialVelocity initial_velocity(gv, configuration.sub("parameters"));
+  using InitialPressure = TaylorGreenPressure<GV,RF>;
+  InitialPressure initial_pressure(gv, configuration.sub("parameters"));
+  using InitialSolution = Dune::PDELab::CompositeGridFunction<InitialVelocity,InitialPressure>;
+  InitialSolution initial_solution(initial_velocity, initial_pressure);
+
+  // Make coefficent vector and initialize it from a function
+  using V = typename IGO::Traits::Domain;
+  V xold(gfs);
+  xold = 0.0;
+  Dune::PDELab::interpolate(initial_solution,gfs,xold);
+
+  // Solver
+#ifdef PERIODIC
+  using LinearSolver = Dune::PDELab::ISTLBackend_OVLP_BCGS_ILU0<GFS,CC>;
+  LinearSolver ls(gfs,cc);
+#else
+  using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_BCGS_ILU0;
+  LinearSolver ls;
+  // using LinearSolver = Dune::PDELab::ISTLBackend_SEQ_UMFPack;
+  // LinearSolver ls(false);
+#endif
+  using PDESolver = Dune::PDELab::Newton<IGO,LinearSolver,V>;
+  PDESolver newton(igo,xold,ls);
+  // newton.setReassembleThreshold(0.0);
+  // newton.setVerbosityLevel(2);
+  // newton.setMaxIterations(50);
+  // newton.setLineSearchMaxIterations(30);
+
+  // Time stepping
+  // using TSM = Dune::PDELab::OneStepThetaParameter<RF>;
+  // TSM tsm(1.0);
+  using TSM = Dune::PDELab::Alexander2Parameter<RF>;
+  TSM tsm;
+  Dune::PDELab::OneStepMethod<RF,IGO,PDESolver,V,V> osm(tsm,igo,newton);
+  // osm.setVerbosityLevel(2);
+
+  // Set time
+  RF time = 0.0;
+  RF time_end = configuration.get<RF>("driver.time_end");
+  RF dt = configuration.get<RF>("driver.dt");
+  RF dt_min = 1e-8;
+
+  // Visualize initial condition
+  using VTKSW = Dune::VTKSequenceWriter<GV>;
+  using VTKWriter = Dune::SubsamplingVTKWriter<GV>;
+  VTKWriter vtkwriter(gv, 2);
+  VTKSW vtkSequenceWriter(std::make_shared<VTKWriter>(vtkwriter), filename);
+  CuttingPredicate predicate;
+  Dune::PDELab::addSolutionToVTKWriter(vtkSequenceWriter, gfs, xold, Dune::PDELab::vtk::defaultNameScheme(), predicate);
+  vtkSequenceWriter.write(time, Dune::VTK::appendedraw);
+
+  V x(gfs,0.0);
+
+#ifdef NORMALIZE_PRESSURE
+  // Pressure normalization
+  using PressureSubGFS = typename Dune::PDELab::GridFunctionSubSpace <GFS,Dune::TypeTree::TreePath<1> >;
+  PressureSubGFS pressureSubGfs(gfs);
+  using PDGF = Dune::PDELab::DiscreteGridFunction<PressureSubGFS,V>;
+  PDGF pdgf(pressureSubGfs,x);
+  typename PDGF::Traits::RangeType pressure_integral(0);
+
+  int elements = int(sqrt(gv.size(0)));
+  int pressure_index = elements * elements * dim * pow((velocity_degree + 1), dim);
+  using Dune::PDELab::Backend::native;
+  std::cout << std::endl;
+  std::cout << "info elements: " << elements << std::endl;
+  std::cout << "info pressure_index: " << pressure_index << std::endl;
+  std::cout << "info gfs.size(): " << gfs.size() << std::endl;
+  std::cout << "info native(x).size(): " << native(x).size() << std::endl;
+  std::cout << std::endl;
+#endif
+
+  // Time loop
+  int step = 0;
+  const int nth = configuration.get<RF>("driver.nth");
+  while (time < time_end - dt_min*0.5){
+    osm.apply(time,dt,xold,x);
+
+#ifdef NORMALIZE_PRESSURE
+    // Correct pressure after each step. Without this pressure
+    // correction the velocity will be ok but the pressure will be
+    // shifted by a constant.
+    Dune::PDELab::integrateGridFunction(pdgf,pressure_integral,2);
+    pressure_integral = gv.comm().sum(pressure_integral);
+    std::cout << gv.comm().rank() << " pressure_integral before normalization: " << pressure_integral << std::endl;
+
+    // Scale integral
+    pressure_integral = pressure_integral/4;
+    for (int i=pressure_index; i<gfs.size(); ++i){
+      native(x)[i] -= pressure_integral;
+    }
+    Dune::PDELab::integrateGridFunction(pdgf,pressure_integral,2);
+    pressure_integral = gv.comm().sum(pressure_integral);
+    std::cout << "pressure_integral after normalization: " << pressure_integral << std::endl;
+#endif
+
+    xold = x;
+    time += dt;
+    step++;
+
+    if(step%nth==0){
+      vtkSequenceWriter.write(time, Dune::VTK::appendedraw);
+    }
+  }
+}
+
+//===============================================================
+// Main program with grid setup
+//===============================================================
+int main(int argc, char** argv)
+{
+  try{
+    // Maybe initialize Mpi
+    Dune::MPIHelper::instance(argc, argv);
+
+    // Read ini file
+    Dune::ParameterTree configuration;
+    const std::string config_filename("taylor-green.ini");
+    std::cout << "Reading ini-file \""<< config_filename
+              << "\"" << std::endl;
+    Dune::ParameterTreeParser::readINITree(config_filename, configuration);
+
+    // Create grid
+    const int dim = 2;
+    const int cells_per_dir = configuration.get<double>("driver.cells_per_dir");
+    Dune::FieldVector<double,dim> lowerleft(-1.0);
+    Dune::FieldVector<double,dim> upperright(1.0);
+    std::array<int, dim> cells(Dune::fill_array<int, dim>(cells_per_dir));
+    std::bitset<dim> periodic(false);
+    int overlap = 0;
+#ifdef PERIODIC
+    periodic[0] = true;
+    periodic[1] = true;
+    overlap = 1;
+#endif
+    using Grid = Dune::YaspGrid<dim, Dune::EquidistantOffsetCoordinates<double, dim> >;
+    Grid grid(lowerleft, upperright, cells, periodic, overlap);
+
+    // Solve problem
+    using GV = Grid::LeafGridView;
+    const GV gv=grid.leafGridView();
+    Dune::dinfo.push(false);
+    taylor_green<GV,double,2,1>(gv,configuration,"taylor-green");
+    return 0;
+  }
+  catch (Dune::Exception &e){
+    std::cerr << "Dune reported error: " << e << std::endl;
+    return 1;
+  }
+  catch (...){
+    std::cerr << "Unknown exception thrown!" << std::endl;
+    return 1;
+  }
+}
diff --git a/test/navier-stokes/reference_program/taylor-green.hh b/test/navier-stokes/reference_program/taylor-green.hh
new file mode 100644
index 0000000000000000000000000000000000000000..fc30069f6507ac9e10f36c147fa9188ce8f86293
--- /dev/null
+++ b/test/navier-stokes/reference_program/taylor-green.hh
@@ -0,0 +1,149 @@
+#ifndef TAYLOR_GREEN_HH
+#define TAYLOR_GREEN_HH
+
+//===============================================================
+// Define parameter functions f,g,j and \partial\Omega_D/N
+//===============================================================
+
+// constraints parameter class for selecting boundary condition type
+class BCTypeParamGlobalDirichlet
+{
+public :
+  typedef Dune::PDELab::StokesBoundaryCondition BC;
+
+  struct Traits
+  {
+    typedef BC::Type RangeType;
+  };
+
+  BCTypeParamGlobalDirichlet() {}
+
+  template<typename I>
+  inline void evaluate (const I & intersection,   /*@\label{bcp:name}@*/
+                        const Dune::FieldVector<typename I::ctype, I::dimension-1> & coord,
+                        BC::Type& y) const
+  {
+    y = BC::VelocityDirichlet;
+  }
+
+  template<typename T>
+  void setTime(T t){
+  }
+};
+
+
+template<typename GV, typename RF, int dim>
+class TaylorGreenVelocity :
+  public Dune::PDELab::AnalyticGridFunctionBase<
+  Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim>,
+  TaylorGreenVelocity<GV,RF,dim> >
+{
+public:
+  typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim> Traits;
+  typedef Dune::PDELab::AnalyticGridFunctionBase<Traits,TaylorGreenVelocity<GV,RF,dim> > BaseT;
+
+  typedef typename Traits::DomainType DomainType;
+  typedef typename Traits::RangeType RangeType;
+
+  TaylorGreenVelocity(const GV & gv, const Dune::ParameterTree& params) : BaseT(gv)
+  {
+    mu = params.get<RF>("mu");
+    rho = params.get<RF>("rho");
+    time = 0.0;
+  }
+
+  inline void evaluateGlobal(const DomainType & x, RangeType & y) const
+  {
+    // TODO Get mu and rho from somewhere else!
+    RF pi = 3.14159265358979323846;
+    RF nu = mu/rho;
+    y[0] = -exp(-2.0*pi*pi*nu*time)*cos(pi*x[0])*sin(pi*x[1]);
+    y[1] = exp(-2.0*pi*pi*nu*time)*sin(pi*x[0])*cos(pi*x[1]);
+  }
+
+  template <typename T>
+  void setTime(T t){
+    time = t;
+  }
+
+private:
+  RF rho;
+  RF mu;
+  RF time;
+};
+
+
+template<typename GV, typename RF>
+class TaylorGreenPressure
+  : public Dune::PDELab::AnalyticGridFunctionBase<
+  Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,1>,
+  TaylorGreenPressure<GV,RF> >
+{
+public:
+  typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,1> Traits;
+  typedef Dune::PDELab::AnalyticGridFunctionBase<Traits,TaylorGreenPressure<GV,RF> > BaseT;
+
+  typedef typename Traits::DomainType DomainType;
+  typedef typename Traits::RangeType RangeType;
+
+  TaylorGreenPressure (const GV& gv, const Dune::ParameterTree& params) : BaseT(gv)
+  {
+    mu = params.get<RF>("mu");
+    rho = params.get<RF>("rho");
+    time = 0.0;
+  }
+
+  inline void evaluateGlobal (const typename Traits::DomainType& x,
+                              typename Traits::RangeType& y) const
+  {
+    RF pi = 3.14159265358979323846;
+    RF nu = mu/rho;
+    y = -0.25*rho*exp(-4.0*pi*pi*nu*time)*(cos(2.0*pi*x[0])+cos(2.0*pi*x[1]));
+  }
+
+  template<typename T>
+  void setTime(T t){
+    time = t;
+  }
+
+private:
+  RF rho;
+  RF mu;
+  RF time;
+};
+
+
+
+template<typename GV, typename RF, std::size_t dim_range>
+class ZeroVectorFunction :
+  public Dune::PDELab::AnalyticGridFunctionBase<
+  Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim_range>,
+  ZeroVectorFunction<GV,RF,dim_range> >,
+  public Dune::PDELab::InstationaryFunctionDefaults
+{
+public:
+  typedef Dune::PDELab::AnalyticGridFunctionTraits<GV,RF,dim_range> Traits;
+  typedef Dune::PDELab::AnalyticGridFunctionBase<Traits, ZeroVectorFunction> BaseT;
+
+  typedef typename Traits::DomainType DomainType;
+  typedef typename Traits::RangeType RangeType;
+
+  ZeroVectorFunction(const GV & gv) : BaseT(gv) {}
+
+  inline void evaluateGlobal(const DomainType & x, RangeType & y) const
+  {
+    y=0;
+  }
+};
+
+template<typename GV, typename RF>
+class ZeroScalarFunction
+  : public ZeroVectorFunction<GV,RF,1>
+{
+public:
+
+  ZeroScalarFunction(const GV & gv) : ZeroVectorFunction<GV,RF,1>(gv) {}
+
+};
+
+#endif
diff --git a/test/navier-stokes/reference_program/taylor-green.ini b/test/navier-stokes/reference_program/taylor-green.ini
new file mode 100644
index 0000000000000000000000000000000000000000..b7e94815a97507b65f0d85838c63882e349b860d
--- /dev/null
+++ b/test/navier-stokes/reference_program/taylor-green.ini
@@ -0,0 +1,14 @@
+[parameters]
+rho = 1.0
+mu = 0.01
+
+[parameters.dg]
+epsilon = -1
+sigma = 6.0
+beta = 1.0
+
+[driver]
+time_end = 1.0
+dt = 1e-3
+nth = 20
+cells_per_dir = 16
diff --git a/test/navier-stokes/reference_program/taylor_green_solution.py b/test/navier-stokes/reference_program/taylor_green_solution.py
new file mode 100644
index 0000000000000000000000000000000000000000..0def653962e95367a9b24ee5f5a80773cbb0340e
--- /dev/null
+++ b/test/navier-stokes/reference_program/taylor_green_solution.py
@@ -0,0 +1,99 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+def pressure(t, x, y):
+    rho = 1.0
+    mu = 1.0/100
+    nu = mu/rho
+    pi = np.pi
+    return -0.25*rho*np.exp(-4.0*pi**2*nu*t)*(np.cos(2.0*pi*x) + np.cos(2.0*pi*y))
+
+
+def velocity(t, x, y):
+    rho = 1.0
+    mu = 1.0/100
+    nu = mu/rho
+    pi = np.pi
+    v = np.empty(2)
+    v[0] = -np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y)
+    v[1] = np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*cos(pi*y)
+    return v
+
+
+def v_0(t, x, y):
+    rho = 1.0
+    mu = 1.0/100
+    nu = mu/rho
+    pi = np.pi
+    return -np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y)
+
+
+def v_1(t, x, y):
+    rho = 1.0
+    mu = 1.0/100
+    nu = mu/rho
+    pi = np.pi
+    return np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*cos(pi*y)
+
+
+def velocity_norm(t, x, y):
+    rho = 1.0
+    mu = 1.0/100
+    nu = mu/rho
+    pi = np.pi
+    return np.sqrt((-np.exp(-2.0*pi*mu/rho*t)*np.cos(pi*x)*np.sin(pi*y))**2 + (np.exp(-2.0*pi*mu/rho*t)*np.sin(pi*x)*np.cos(pi*y))**2)
+
+
+def plot_pressure(t, n):
+    h = 2.0/n
+    x = np.arange(-1,1,h)
+    y = np.arange(-1,1,h)
+    xx, yy = np.meshgrid(x, y, sparse=True)
+    z = pressure(t, xx, yy)
+    CS = plt.contourf(x,y,z)
+    cbar = plt.colorbar(CS)
+    plt.show()
+
+
+def minmax_pressure(t, n):
+    h = 2.0/n
+    x = np.arange(-1,1,h)
+    y = np.arange(-1,1,h)
+    xx, yy = np.meshgrid(x, y, sparse=True)
+    z = pressure(t, xx, yy)
+    return np.min(z), np.max(z)
+
+
+def plot_velocity(t, n):
+    h = 2.0/n
+    x = np.arange(-1,1,h)
+    y = np.arange(-1,1,h)
+    xx, yy = np.meshgrid(x, y, sparse=True)
+    z = velocity_norm(t, xx, yy)
+    CS = plt.contourf(x,y,z)
+    cbar = plt.colorbar(CS)
+    plt.show()
+
+
+def minmax_velocity_norm(t, n):
+    h = 2.0/n
+    x = np.arange(-1,1,h)
+    y = np.arange(-1,1,h)
+    xx, yy = np.meshgrid(x, y, sparse=True)
+    z = velocity_norm(t, xx, yy)
+    return np.min(z), np.max(z)
+
+
+print(minmax_velocity_norm(1.0, 64))
+
+# plot_pressure(1.0, 100)
+# plot_velocity(1.0, 100)
+
+# dt = 1.0e-4
+# n = 1000
+# t = 0.0
+
+# for i in range(20):
+#     minimum, maximum = minmax_pressure(t, n)
+#     print("t: {}, n: {}, minumum: {}, maximum: {}".format(t,n,minimum,maximum))
+#     t = t + dt