From dc799b48dfaafe59338771eda5b552d3f83a8672 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Fri, 16 Dec 2016 15:56:35 +0100
Subject: [PATCH] Accumulation possible for fastdg

---
 python/dune/perftool/sumfact/basis.py   | 12 ++++++------
 python/dune/perftool/sumfact/sumfact.py | 20 +++++---------------
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index 8401ad18..7e0777dd 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -139,29 +139,29 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
 
     # Get the vectorization info. If this happens during the dry run, we get dummies
     from dune.perftool.sumfact.vectorization import get_vectorization_info
-    a_matrices, buffer, input, index, padding = get_vectorization_info(a_matrices, restriction)
+    a_matrices, buf, inp, index, padding = get_vectorization_info(a_matrices, restriction)
 
     # Flip flop buffers for sumfactorization
     shape = (product(mat.cols for mat in a_matrices),)
     if index is not None:
         shape = shape + (4,)
-    initialize_buffer(buffer,
+    initialize_buffer(buf,
                       base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
                       num=2
                       ).get_temporary(shape=shape,
-                                      name=input,
+                                      name=inp,
                                       )
 
     # Setup the input!
-    setup_theta(input, element, restriction, component, index)
+    setup_theta(inp, element, restriction, component, index)
 
     # Add a sum factorization kernel that implements the evaluation of
     # the basis functions at quadrature points (stage 1)
     var, _ = sum_factorization_kernel(a_matrices,
-                                      buffer,
+                                      buf,
                                       1,
                                       preferred_position=None,
-                                      insn_dep=frozenset({Writes(input)}),
+                                      insn_dep=frozenset({Writes(inp)}),
                                       outshape=tuple(mat.rows for mat in a_matrices if mat.rows != 1),
                                       restriction=restriction,
                                       )
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index d363fc0c..5b340018 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -244,9 +244,11 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                (maybe_wrap_subscript(result, prim.Variable(iname)),),
                                )
 
+        # In the case of FastDGGridOperator we can write directly into the resiudal/jacobi
         if get_option('fastdg'):
             ft = get_global_context_value("form_type")
             if ft=='residual':
+                accum = accum + ".data()"
                 size = basis_functions_per_direction() ** world_dimension()
                 globalarg(accum, dtype=np.float64, shape=(size,), managed=False)
                 assignee = prim.Subscript(prim.Variable(accum), (test_lfs.index,))
@@ -259,9 +261,9 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                             )
             else:
                 assert ft=='jacobian'
-                # palpo TODO: think about it
+                accum = accum + ".data()"
                 size = basis_functions_per_direction() ** world_dimension()
-                globalarg(accum, dtype=np.float64, shape=(size, size), managed=False)
+                globalarg(accum, dtype=np.float64, shape=(size, size), managed=True)
                 assignee = prim.Subscript(prim.Variable(accum), (ansatz_lfs.index, test_lfs.index))
                 expression = prim.Sum((assignee,result))
                 instruction(assignee=assignee,
@@ -270,19 +272,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                             forced_iname_deps_is_final=True,
                             depends_on=insn_dep,
                             )
-                # expr = Call(PDELabAccumulationFunction(accum, rank),
-                #             (ansatz_lfs.get_args() +
-                #              test_lfs.get_args() +
-                #              (result,)
-                #              )
-                #             )
-                # instruction(assignees=(),
-                #             expression=expr,
-                #             forced_iname_deps=frozenset(inames + visitor.inames + vecinames),
-                #             forced_iname_deps_is_final=True,
-                #             depends_on=insn_dep,
-                #             )
-
+        # Default: Generate accumulation instructions
         else:
             expr = Call(PDELabAccumulationFunction(accum, rank),
                         (ansatz_lfs.get_args() +
-- 
GitLab