Skip to content
Snippets Groups Projects
Commit 12a47939 authored by René Heß's avatar René Heß
Browse files

Merge branch 'feature/remove-trivial-reductions' into 'master'

Eliminate trivial reductions

For some reason, the patch refuses to schedule the kernel for sumfact_poisson_dg_symdiff. Mystery bug!

See merge request !79
parents 7424fb14 31d4027f
No related branches found
No related tags found
No related merge requests found
......@@ -491,9 +491,15 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True):
preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("{} and preamble".format(tag)))]
kernel = kernel.copy(preambles=preambles)
# Remove inames that have become obsolete
kernel = lp.remove_unused_inames(kernel)
# Do the loopy preprocessing!
kernel = preprocess_kernel(kernel)
# *REALLY* ignore boostability. This is - so far - necessary due to a mystery bug.
kernel = kernel.copy(instructions=[i.copy(boostable=False, boostable_into=frozenset()) for i in kernel.instructions])
if wrap_in_cgen:
# Wrap the kernel in something which can generate code
from dune.perftool.pdelab.signatures import assembly_routine_signature
......
......@@ -247,9 +247,9 @@ def evaluate_reference_gradient(element, name, restriction):
i = 0
for d in range(local_dimension()):
if d == facedir:
i = i+1
i = i + 1
quadinamemapping[i] = quad_inames[d]
i = i+1
i = i + 1
for d in range(dim):
prod = []
......@@ -261,7 +261,7 @@ def evaluate_reference_gradient(element, name, restriction):
if facedir is not None:
facemod = get_facemod(restriction)
from dune.perftool.sumfact.amatrix import PolynomialLookup, name_polynomials
prod.append(prim.Call(PolynomialLookup(name_polynomials(), facedir==d),
prod.append(prim.Call(PolynomialLookup(name_polynomials(), facedir == d),
(prim.Variable(inames[facedir]), facemod)),)
assignee = prim.Subscript(prim.Variable(name), (d,))
......
......@@ -308,7 +308,6 @@ def sum_factorization_kernel(a_matrices, buf, stage,
# Get the inames needed for one matrix-matrix multiplication
i = sumfact_iname(out_shape[0], "row")
j = sumfact_iname(out_shape[1], "col")
k = sumfact_iname(a_matrix.cols, "red")
# Maybe introduce a vectorization iname for this matrix-matrix multiplication
vec_iname = ()
......@@ -317,15 +316,27 @@ def sum_factorization_kernel(a_matrices, buf, stage,
vec_iname = (prim.Variable(iname),)
transform(lp.tag_inames, [(iname, "vec")])
# Construct the matrix-matrix-multiplication expression a_ik*in_kj
prod = Product((Subscript(Variable(a_matrix.name), (Variable(i), Variable(k)) + vec_iname),
Subscript(Variable(inp), (Variable(k), Variable(j)) + vec_iname)
))
if a_matrix.cols == 1:
# A trivial reduction is implemented as a product, otherwise we run into
# a code generation corner case producing way too complicated code. This
# could be fixed upstream, but the loopy code realizing reductions is not
# trivial and the priority is kind of low.
matprod = Product((Subscript(Variable(a_matrix.name), (Variable(i), 0) + vec_iname),
Subscript(Variable(inp), (0, Variable(j)) + vec_iname)
))
else:
k = sumfact_iname(a_matrix.cols, "red")
# Construct the matrix-matrix-multiplication expression a_ik*in_kj
prod = Product((Subscript(Variable(a_matrix.name), (Variable(i), Variable(k)) + vec_iname),
Subscript(Variable(inp), (Variable(k), Variable(j)) + vec_iname)
))
matprod = Reduction("sum", k, prod)
# Issue the reduction instruction that implements the multiplication
# at the same time store the instruction ID for the next instruction to depend on
insn_dep = frozenset({instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j)) + vec_iname),
expression=Reduction("sum", k, prod),
expression=matprod,
forced_iname_deps=frozenset({i, j}).union(additional_inames),
forced_iname_deps_is_final=True,
depends_on=insn_dep,
......
......@@ -38,7 +38,7 @@ def no_vectorization(sumfacts):
sumf.restriction,
sumf.a_matrices,
get_counted_variable("buffer"),
get_counted_variable(restricted_name("input", sumf.restriction)),
get_counted_variable("input"),
None)
......@@ -135,4 +135,4 @@ class HasSumfactMapper(lp.symbolic.CombineMapper):
def find_sumfact(expr):
return HasSumfactMapper()(expr)
return HasSumfactMapper()(expr)
\ No newline at end of file
loopy @ c16057b7
Subproject commit 36c9bb5c0a5905022fc850c3efc5ad7661e5f897
Subproject commit c16057b7c361584d04edb59132f0742ecaa38226
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment