diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index ffd4551d13211626afe379a606ef894efb384743..05ebe70efba513182fc0616b4121f278dfe1aa3d 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -59,6 +59,7 @@ class PerftoolOptionsArray(ImmutableRecord): vectorize_slice = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") vectorize_diagonal = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") vectorize_greedy = PerftoolOption(default=False, helpstr="the heuristic currently in use (to produce paper numbers)") + vectorize_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.") architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl") grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 303092f120d0df2b9f80575de739c7eef64c547b..af92c6967388cca9679829d7805803011def4108 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -11,9 +11,13 @@ from dune.perftool.generation import (generator_factory, from dune.perftool.pdelab.restriction import (Restriction, restricted_name, ) -from dune.perftool.sumfact.tabulation import BasisTabulationMatrixArray +from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, + quadrature_points_per_direction, + set_quadrature_points_per_direction, + ) from dune.perftool.error import PerftoolError from dune.perftool.options import get_option +from dune.perftool.tools import round_to_multiple import loopy as lp import numpy as np @@ -67,6 +71,11 @@ def vertical_vectorization_strategy(sumfact, depth): for i, mat in enumerate(sf.matrix_sequence): if mat.quadrature_size % depth == 0: return i + elif get_option("vectorize_allow_quadrature_changes") and mat.quadrature_size != 1: + quad = list(quadrature_points_per_direction()) + quad[i] = round_to_multiple(quad[i], depth) + set_quadrature_points_per_direction(tuple(quad)) + return i elif mat.quadrature_size != 1: raise PerftoolError("Vertical vectorization is not possible!") diff --git a/python/dune/perftool/tools.py b/python/dune/perftool/tools.py index a0dfaf29af06abcdc3f3e49d71291e663536e130..260fc9dfe6151492405e8b6996afa1623e9f3896 100644 --- a/python/dune/perftool/tools.py +++ b/python/dune/perftool/tools.py @@ -63,3 +63,7 @@ def get_pymbolic_tag(expr): def ceildiv(a, b): return -(-a // b) + + +def round_to_multiple(x, n): + return n * ceildiv(x, n)