From 48b2991da2b5b15ca85700a70f3ad9556b4884bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Thu, 26 Jan 2017 10:04:39 +0100
Subject: [PATCH] Add documentation

---
 python/dune/perftool/sumfact/sumfact.py | 27 +++++++++++++++----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 65f26ece..7e2594df 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -242,7 +242,9 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
         # Construct the expression representing "{r,jac}.accumulate(..)"
         accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
 
-
+        # We can directly accumulate the solution in the last step of
+        # the sumfactorization. Currently this is only implemented for
+        # FastDGGridOperator.
         direct_output = None
         if get_option('fastdg'):
             ft = get_global_context_value("form_type")
@@ -281,19 +283,23 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                )
 
         # In the case of FastDGGridOperator we can write directly into the resiudal/jacobi
-        #
-        # TODO: At the moment this only works if we do not vectorize
-        # (over gradients) because loopy tries to acces a vectorclass
-        # variable.
         if get_option('fastdg'):
-            ft = get_global_context_value("form_type")
-            if ft == 'residual' or ft == 'jacobian_apply':
-                if get_global_context_value("dry_run", False):
+            # TODO: We accumulate the result directly in the last step
+            # of the sumfactorization. This part of code is only here
+            # because the pymbolic sumfactorization node seems to
+            # vanish if there is no assignement afterwards. In the dry
+            # run we force the node to be present by doing an
+            # assignement here.  After the dry run everything works as
+            # expected.
+            #
+            # (Note: This code does the right thing if you do not use direct_output.)
+            if get_global_context_value("dry_run", False):
+                ft = get_global_context_value("form_type")
+                if ft == 'residual' or ft == 'jacobian_apply':
                     shape = (basis_functions_per_direction(),) * world_dimension()
                     ftags = ",".join(["f"]*len(shape))
                     globalarg(accum, dtype=np.float64, shape=shape, dim_tags=ftags)
                     assignee = prim.Subscript(prim.Variable(accum), tuple(prim.Variable(i) for i in inames))
-
                     expression = prim.Sum((assignee, result))
                     instruction(assignee=assignee,
                                 expression=expression,
@@ -301,8 +307,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                 forced_iname_deps_is_final=True,
                                 depends_on=insn_dep,
                     )
-            else:
-                if get_global_context_value("dry_run", False):
+                else:
                     assert ft == 'jacobian'
                     shape = (basis_functions_per_direction(),) * (world_dimension() * 2)
                     ftags = ",".join(["f"] * len(shape))
-- 
GitLab