From 144d1c8143a085e40ab84763abce12796c2c202f Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Fri, 28 Oct 2016 13:41:49 +0200 Subject: [PATCH] Include stage 3, with a lot of dummies etc. --- python/dune/perftool/pdelab/localoperator.py | 7 ++- python/dune/perftool/sumfact/sumfact.py | 50 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 48998b88..fca6efac 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -2,12 +2,14 @@ from __future__ import absolute_import from os.path import splitext from dune.perftool.options import get_option -from dune.perftool.generation import (base_class, +from dune.perftool.generation import (backend, + base_class, class_basename, class_member, constructor_parameter, domain, dump_accumulate_timer, + get_backend, global_context, iname, include_file, @@ -361,6 +363,7 @@ def grad_iname(index, dim): return name +@backend(interface="accum_insn") def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): # First we do the tree traversal to get a pymbolic expression representing this expression pymbolic_expr = visitor(accterm.term) @@ -467,7 +470,7 @@ def generate_kernel(integrals): interface = PDELabInterface() from dune.perftool.ufl.visitor import UFL2LoopyVisitor visitor = UFL2LoopyVisitor(interface, measure, indexmap) - generate_accumulation_instruction(visitor, term, measure, subdomain_id) + get_backend(interface="accum_insn")(visitor, term, measure, subdomain_id) # Extract the information, which is needed to create a loopy kernel. # First extracting it, might be useful to alter it before kernel generation. diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index 37be6d7d..a9c7271a 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -4,9 +4,12 @@ from dune.perftool.pdelab.argument import (name_coefficientcontainer, from dune.perftool.generation import (backend, domain, get_counter, + get_global_context_value, + globalarg, iname, instruction, silenced_warning, + temporary_variable, ) from dune.perftool.loopy.buffer import (get_buffer_temporary, initialize_buffer, @@ -82,6 +85,53 @@ def pymbolic_trialfunction(element, restriction, component): return Subscript(Variable(var), tuple(Variable(i) for i in quadrature_inames())) +@backend(interface="accum_insn", name="sumfact") +def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): + pymbolic_expr = visitor(accterm.term) + + if pymbolic_expr == 0: + return + + # Get geometric dimension + formdata = get_global_context_value('formdata') + dim = formdata.geometric_dimension + + # Get a matrix that holds the result of the pymbolic_expr at all quadrature points + name = "blubb" + temporary_variable(name, shape=(quadrature_points_per_direction(),) * dim, managed=True) + instruction(assignee=Subscript(Variable(name), tuple(Variable(i) for i in quadrature_inames())), + expression=pymbolic_expr, + forced_iname_deps=frozenset(quadrature_inames()), + forced_iname_deps_is_final=True, + depends_on=frozenset({stage_insn(1)}), + ) + + theta_transposed = name_theta_transposed() + rows = basis_functions_per_direction() + cols = quadrature_points_per_direction() + a_matrix = AMatrix(theta_transposed, rows, cols) + a_matrices = (a_matrix, a_matrix) + + # Do stage 1 + initialize_buffer("reffub", + base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices), + num=2 + ) + + result = sum_factorization_kernel(a_matrices, "reffub", 2) + + from dune.perftool.pdelab.spaces import LFSLocalIndex + # Now write all this into the correct residual + inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices) + globalarg("r", shape=(basis_functions_per_direction() ^ dim,)) + instruction(expression=Subscript(Variable(result), tuple(Variable(i) for i in inames)), +# assignee=Subscript(Variable("r"), (Call(LFSLocalIndex("lfs"), tuple(Variable(i) for i in inames)),)), + assignee=Subscript(Variable("r"), (0,)), + forced_iname_deps=frozenset(inames), + forced_iname_deps_is_final=True, + depends_on=frozenset({stage_insn(3)}), + ) + # # Do stage 3 (for f=u => mass matrix) # theta_transposed = name_theta_transposed() # a_matrix_transposed = AMatrix(theta_transposed, cols, rows) -- GitLab