diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 4b704c449a8ea31249faa70011c2fd8db1558746..d4aea877bdfd46157f3447baaf02869a2dfe6131 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -205,7 +205,8 @@ def _realize_sum_factorization_kernel(sf): # In case of direct output we directly accumulate the result # of the Sumfactorization into some global data structure. if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3: - insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name})) + if sf.vectorized: + insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name})) insn_dep = sf.output.realize_direct(matprod, output_inames, out_shape, insn_args) else: # Issue the reduction instruction that implements the multiplication diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 4ee4960ec15ba6ece4287ae3c1cb26e2f7a8eb5e..44cf0e0bf530867a167127e3a00bef1cf5a3bc22 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -10,7 +10,7 @@ from dune.perftool.sumfact.quadrature import quadrature_inames from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase, BasisTabulationMatrixArray from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.vcl import ExplicitVCLCast, VCLLowerUpperLoad -from dune.perftool.tools import get_leaf +from dune.perftool.tools import get_leaf, maybe_wrap_subscript from pytools import ImmutableRecord, product