From 323dcb6c950c0c2cffe0fd9a21a59bd4fca4cae3 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Mon, 24 Apr 2017 15:24:24 +0200 Subject: [PATCH] Unroll dimension loops in sumfact branch --- python/dune/perftool/options.py | 5 +++++ python/dune/perftool/sumfact/accumulation.py | 12 +++++++++--- python/dune/perftool/ufl/modified_terminals.py | 16 ++++++++++++---- python/dune/perftool/ufl/preprocess.py | 2 ++ python/dune/perftool/ufl/visitor.py | 12 ++++++++---- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 650626a5..7d7712d9 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -64,6 +64,7 @@ class PerftoolOptionsArray(ImmutableRecord): # Arguments that are mainly to be set by logic depending on other options max_vector_width = PerftoolOption(default=256, helpstr=None) + unroll_dimension_loops = PerftoolOption(default=False, helpstr="whether loops over the gemetric dimension should be unrolled.") # Until more sophisticated logic is needed, we keep the actual option data in this module @@ -111,6 +112,10 @@ def update_options_from_inifile(opt): def process_options(opt): """ Make sure that the options have been fully processed """ opt = expand_architecture_options(opt) + + if opt.sumfact: + opt = opt.copy(unroll_dimension_loops=True) + return opt diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index f01d2bd3..ca1b52f5 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -40,6 +40,7 @@ from pytools import ImmutableRecord import loopy as lp import numpy as np import pymbolic.primitives as prim +import ufl.classes as uc @iname @@ -87,8 +88,6 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): mod_arg_expr = accterm.argument.expr from ufl.classes import FunctionView, Argument while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)): - # If this has more than one operand we are potentially doing dangerous stuff - assert len(mod_arg_expr.ufl_operands) == 1 mod_arg_expr = mod_arg_expr.ufl_operands[0] degree = mod_arg_expr.ufl_element()._degree basis_size = degree + 1 @@ -111,10 +110,17 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): if accterm.is_jacobian: jacobian_inames = visitor.inames + # Determine the derivative direction + derivative = None + if accterm.new_indices: + derivative = indices[-1] + if isinstance(accterm.argument.expr, uc.Indexed): + derivative = accterm.argument.expr.ufl_operands[1][0]._value + # Construct the matrix sequence for this sum factorization matrix_sequence = construct_basis_matrix_sequence( transpose=True, - derivative=indices[-1] if accterm.new_indices else None, + derivative=derivative, facedir=get_facedir(accterm.argument.restriction), facemod=get_facemod(accterm.argument.restriction), basis_size=basis_size) diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py index 50dd3315..3339eec7 100644 --- a/python/dune/perftool/ufl/modified_terminals.py +++ b/python/dune/perftool/ufl/modified_terminals.py @@ -4,6 +4,8 @@ from ufl.algorithms import MultiFunction from ufl.classes import MultiIndex from pytools import Record +import ufl.classes as uc + class Restriction: NONE = 0 @@ -92,8 +94,9 @@ class ModifiedTerminalTracker(MultiFunction): class ModifiedArgumentAnalysis(ModifiedTerminalTracker): - def __init__(self, do_index=False): + def __init__(self, do_index=False, do_fixed_index=False): self.do_index = do_index + self.do_fixed_index = do_fixed_index self.index = None ModifiedTerminalTracker.__init__(self) @@ -102,7 +105,9 @@ class ModifiedArgumentAnalysis(ModifiedTerminalTracker): return self.call(o) def indexed(self, o): - if self.do_index: + if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]): + self.index = o.ufl_operands[1] + if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]): self.index = o.ufl_operands[1] return self.call(o.ufl_operands[0]) @@ -125,9 +130,10 @@ def analyse_modified_argument(expr, **kwargs): class _ModifiedArgumentExtractor(MultiFunction): """ A multifunction that extracts and returns the set of modified arguments """ - def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_gradient=True): + def __call__(self, o, argnumber=None, coeffcount=None, do_index=False, do_fixed_index=True, do_gradient=True): self.argnumber = argnumber self.coeffcount = coeffcount + self.do_fixed_index = do_fixed_index self.do_index = do_index self.do_gradient = do_gradient self.modified_arguments = set() @@ -151,7 +157,9 @@ class _ModifiedArgumentExtractor(MultiFunction): return o def indexed(self, o): - if self.do_index: + if self.do_fixed_index and all(isinstance(i, uc.FixedIndex) for i in o.ufl_operands[1]): + return self.pass_on(o) + if self.do_index and all(isinstance(i, uc.Index) for i in o.ufl_operands[1]): return self.pass_on(o) else: self.expr(o) diff --git a/python/dune/perftool/ufl/preprocess.py b/python/dune/perftool/ufl/preprocess.py index 913bf5da..0a99d219 100644 --- a/python/dune/perftool/ufl/preprocess.py +++ b/python/dune/perftool/ufl/preprocess.py @@ -27,7 +27,9 @@ def apply_default_transformations(form): from dune.perftool.ufl.transformations import transform_form from dune.perftool.ufl.transformations.indexpushdown import pushdown_indexed from dune.perftool.ufl.transformations.reindexing import reindexing + from dune.perftool.ufl.transformations.unroll import unroll_dimension_loops + form = transform_form(form, unroll_dimension_loops) form = transform_form(form, pushdown_indexed) form = transform_form(form, reindexing) diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index 38b7eee6..280c1d88 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -381,13 +381,17 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): if self.measure == 'exterior_facet': restriction = Restriction.NEGATIVE - # Dune only hast JacobianInverseTransposed as a first class citizen, - # so we need to switch the indices around! assert(len(self.indices) == 2) - indices = (self.indices[1], self.indices[0]) + i, j = self.indices self.indices = None - return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), indices) + # Implement diagonal jacobians for unrolled matrices! + if isinstance(i, int) and isinstance(j, int) and i != j: + return 0 + + # Dune only has JacobianInverseTransposed as a first class citizen, + # so we need to switch the indices around! + return Subscript(Variable(self.interface.name_jacobian_inverse_transposed(restriction)), (j, i)) def jacobian(self, o): raise NotImplementedError("How did you get Jacobian into your form? We only support JacobianInverse right now. Report!") -- GitLab