From 6f0041b59306bcee04fb9d02b596f75443f1b861 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 7 Dec 2016 13:28:25 +0100 Subject: [PATCH] Last fixes to combined vectorization --- dune/perftool/sumfact/transposereg.hh | 3 ++- python/dune/perftool/loopy/transformations/collect_rotate.py | 4 ++-- python/dune/perftool/sumfact/sumfact.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dune/perftool/sumfact/transposereg.hh b/dune/perftool/sumfact/transposereg.hh index 7efecee7..81cdbbc8 100644 --- a/dune/perftool/sumfact/transposereg.hh +++ b/dune/perftool/sumfact/transposereg.hh @@ -1,11 +1,12 @@ #ifndef DUNE_PERFTOOL_SUMFACT_TRANSPOSEREG_HH #define DUNE_PERFTOOL_SUMFACT_TRANSPOSEREG_HH +#include<dune/perftool/vectorclass/vectorclass.h> + /** * Transpose a 4x4 matrix given by 4 vector registers (efficiently) */ - void transpose_reg(Vec4d& a0, Vec4d& a1, Vec4d& a2, Vec4d& a3) { Vec4d b0,b1,b2,b3; diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index f585c79e..409b4c2a 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -61,7 +61,8 @@ def collect_vector_data_rotate(knl): new_insns = [] all_writers = [] - rotating = False + tags = frozenset().union(*tuple(i.tags for i in insns)) + rotating = "gradvec" in tags # # Inspect the given instructions for dependent quantities @@ -156,7 +157,6 @@ def collect_vector_data_rotate(knl): # Add substitution rules for expr in quantities[quantity]: - rotating = True assert isinstance(expr, prim.Subscript) last_index = expr.index[-1] assert last_index in tuple(range(4)) diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index d6f11c77..665fe313 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -153,8 +153,10 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): shape = shape + (4,) dim_tags = dim_tags + ",c" index = (index,) + vectag = frozenset({"gradvec"}) else: index = () + vectag = frozenset() temp = initialize_buffer(buffer, base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices), @@ -183,7 +185,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): expression=expression, forced_iname_deps=frozenset(quadrature_inames() + visitor.inames), forced_iname_deps_is_final=True, - tags=frozenset({"quadvec"}), + tags=frozenset({"quadvec"}).union(vectag), depends_on=frozenset({deps}) ) -- GitLab