From fafe98aff669158e5d175ed169c32586ea588135 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 21 Dec 2016 11:36:58 +0100 Subject: [PATCH] Detect faces in shape generation regardless of the size being 1 --- python/dune/perftool/sumfact/basis.py | 3 ++- python/dune/perftool/sumfact/sumfact.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index 46a639fb..86a9b354 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -108,6 +108,7 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor): preferred_position=i, insn_dep=insn_dep, restriction=restriction, + outshape=tuple(mat.rows for mat in a_matrices if mat.face is None), direct_input=direct_input, ) buffers.append(var) @@ -174,7 +175,7 @@ def pymbolic_trialfunction(element, restriction, component, visitor): 1, preferred_position=None, insn_dep=frozenset({Writes(inp)}), - outshape=tuple(mat.rows for mat in a_matrices if mat.rows != 1), + outshape=tuple(mat.rows for mat in a_matrices if mat.face is None), restriction=restriction, direct_input=direct_input, ) diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index e135fb75..2429da4e 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -144,7 +144,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): a_matrices, buf, inp, index, padding = get_vectorization_info(a_matrices, (accterm.argument.restriction, restriction)) # Initialize a base storage for this buffer and get a temporay pointing to it - shape = tuple(mat.cols for mat in a_matrices if mat.cols != 1) + shape = tuple(mat.cols for mat in a_matrices if mat.face is None) dim_tags = ",".join(['f'] * local_dimension()) if index is not None: shape = shape + (4,) @@ -434,7 +434,9 @@ def sum_factorization_kernel(a_matrices, }) if outshape is None: - outshape = tuple(mat.rows for mat in a_matrices if mat.rows != 1) + assert stage == 3 + outshape = tuple(mat.rows for mat in a_matrices) + dim_tags = ",".join(['f'] * len(outshape)) if next(iter(a_matrices)).vectorized: -- GitLab