Skip to content
Snippets Groups Projects
Commit 7424fb14 authored by René Heß's avatar René Heß
Browse files

Merge branch 'feature/vectorized-facet-integrals' into 'master'

Feature/vectorized facet integrals

See merge request !80
parents dc1ee2c9 06941da4
No related branches found
No related tags found
No related merge requests found
......@@ -22,6 +22,9 @@ class SumfactKernel(prim.Variable):
preferred_position,
restriction,
):
if not isinstance(restriction, tuple):
restriction = (restriction, 0)
self.a_matrices = a_matrices
self.buffer = buffer
self.stage = stage
......
......@@ -159,12 +159,12 @@ def collect_vector_data_rotate(knl):
for expr in quantities[quantity]:
assert isinstance(expr, prim.Subscript)
last_index = expr.index[-1]
assert last_index in tuple(range(4))
replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
(prim.Variable("vec_index") + last_index, prim.Variable(new_iname)),
)
else:
# Add a vector view to this quantity
expr, = quantities[quantity]
knl = add_vector_view(knl, quantity)
replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
(prim.Variable("vec_index"), prim.Variable(new_iname)),
......@@ -289,16 +289,16 @@ def collect_vector_data_rotate(knl):
)
)
# Rotate back!
if rotating:
new_insns.append(lp.CallInstruction((), # assignees
prim.Call(prim.Variable("transpose_reg"),
tuple(prim.Subscript(prim.Variable(lhsname), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
depends_on=frozenset({Tagged("vec_write")}),
within_inames=common_inames.union(inames).union(frozenset({new_iname})),
within_inames_is_final=True,
id="{}_rotateback".format(lhsname),
))
# Rotate back!
if rotating and "{}_rotateback".format(lhsname) not in [i.id for i in new_insns]:
new_insns.append(lp.CallInstruction((), # assignees
prim.Call(prim.Variable("transpose_reg"),
tuple(prim.Subscript(prim.Variable(lhsname), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
depends_on=frozenset({Tagged("vec_write")}),
within_inames=common_inames.union(inames).union(frozenset({new_iname})),
within_inames_is_final=True,
id="{}_rotateback".format(lhsname),
))
from loopy.kernel.creation import resolve_dependencies
return resolve_dependencies(knl.copy(instructions=new_insns + other_insns))
......@@ -12,7 +12,9 @@ from dune.perftool.generation import (backend,
temporary_variable,
valuearg,
)
from dune.perftool.options import option_switch
from dune.perftool.options import (get_option,
option_switch,
)
from dune.perftool.pdelab.quadrature import quadrature_preamble
from dune.perftool.tools import get_pymbolic_basename
from ufl.algorithms import MultiFunction
......@@ -260,8 +262,12 @@ def declare_normal(name, shape, shape_impl):
def name_unit_outer_normal():
name = "outer_normal"
temporary_variable(name, shape=(world_dimension(),), decl_method=declare_normal)
evaluate_unit_outer_normal(name)
if not get_option("diagonal_transformation_matrix"):
temporary_variable(name, shape=(world_dimension(),), decl_method=declare_normal)
evaluate_unit_outer_normal(name)
else:
declare_normal(name, None, None)
globalarg(name, shape=(world_dimension(),), dtype=np.float64)
return "outer_normal"
......
......@@ -24,10 +24,14 @@ class SumFactInterface(PDELabInterface):
return pymbolic_reference_gradient(element, restriction, number)
def pymbolic_trialfunction_gradient(self, element, restriction, component, visitor=None):
return pymbolic_trialfunction_gradient(element, restriction, component, visitor)
ret, indices = pymbolic_trialfunction_gradient(element, restriction, component, visitor)
visitor.indices = indices
return ret
def pymbolic_trialfunction(self, element, restriction, component, visitor=None):
return pymbolic_trialfunction(element, restriction, component, visitor)
ret, indices = pymbolic_trialfunction(element, restriction, component, visitor)
visitor.indices = indices
return ret
def quadrature_inames(self):
return quadrature_inames()
......
......@@ -59,22 +59,24 @@ class AMatrix(ImmutableRecord):
class LargeAMatrix(ImmutableRecord):
def __init__(self, rows, cols, transpose, derivative):
def __init__(self, rows, cols, transpose, derivative, face):
assert isinstance(derivative, tuple)
ImmutableRecord.__init__(self,
rows=rows,
cols=cols,
transpose=transpose,
derivative=derivative,
face=face,
)
@property
def name(self):
name = "ThetaLarge{}_{}".format("T" if self.transpose else "",
"_".join(tuple("d" if d else "" for d in self.derivative))
)
name = "ThetaLarge{}{}_{}".format("face{}_".format(self.face) if self.face is not None else "",
"T" if self.transpose else "",
"_".join(tuple("d" if d else "" for d in self.derivative))
)
for i, d in enumerate(self.derivative):
define_theta(name, (self.rows, self.cols), self.transpose, d, additional_indices=(i,))
define_theta(name, (self.rows, self.cols), self.transpose, d, face=self.face, additional_indices=(i,))
return loopy_class_member(name,
classtag="operator",
......
......@@ -75,7 +75,7 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, 0)
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, restriction)
# Initialize the buffer for the sum fact kernel
shape = (product(mat.cols for mat in a_matrices),)
......@@ -101,6 +101,7 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
1,
preferred_position=i,
insn_dep=insn_dep,
restriction=restriction,
)
buffers.append(var)
......@@ -109,9 +110,7 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
# with the position in the vector register.
if index:
assert len(visitor.indices) == 1
indices = visitor.indices
visitor.indices = None
return maybe_wrap_subscript(var, tuple(prim.Variable(i) for i in quadrature_inames()) + indices)
return maybe_wrap_subscript(var, tuple(prim.Variable(i) for i in quadrature_inames()) + visitor.indices), None
# TODO this should be quite conditional!!!
for i, buf in enumerate(buffers):
......@@ -126,7 +125,7 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
forced_iname_deps_is_final=True,
)
return prim.Variable(name)
return prim.Variable(name), visitor.indices
@kernel_cached
......@@ -140,7 +139,7 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, 0)
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, restriction)
# Flip flop buffers for sumfactorization
shape = (product(mat.cols for mat in a_matrices),)
......@@ -164,6 +163,7 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
preferred_position=None,
insn_dep=frozenset({Writes(input)}),
outshape=tuple(mat.rows for mat in a_matrices if mat.rows != 1),
restriction=restriction,
)
if index:
......@@ -172,8 +172,8 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
index = ()
return prim.Subscript(var,
tuple(prim.Variable(i) for i in quadrature_inames() + index)
)
tuple(prim.Variable(i) for i in quadrature_inames()) + index
), visitor.indices
@iname
......
......@@ -128,7 +128,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
# Get the vectorization info. If this happens during the dry run, we get dummies
from dune.perftool.sumfact.vectorization import get_vectorization_info
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, restriction)
a_matrices, buffer, input, index = get_vectorization_info(a_matrices, (accterm.argument.restriction, restriction))
# Initialize a base storage for this buffer and get a temporay pointing to it
shape = tuple(mat.cols for mat in a_matrices if mat.cols != 1)
......@@ -185,6 +185,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
insn_dep=insn_dep,
additional_inames=frozenset(visitor.inames),
preferred_position=pref_pos,
restriction=(accterm.argument.restriction, restriction),
)
inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
......@@ -240,7 +241,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
insn_dep = emit_sumfact_kernel(None, restriction, insn_dep)
@generator_factory(item_tags=("sumfactkernel",), context_tags=("kernel",), cache_key_generator=lambda a, b, s, **kw: (a, b, s))
@generator_factory(item_tags=("sumfactkernel",), context_tags=("kernel",), cache_key_generator=lambda a, b, s, **kw: (a, b, s, kw.get("restriction", 0)))
def sum_factorization_kernel(a_matrices, buf, stage,
insn_dep=frozenset({}),
additional_inames=frozenset({}),
......
......@@ -18,29 +18,32 @@ def vectorization_info(a_matrices, restriction, new_a_matrices, buffer, input, i
def get_vectorization_info(a_matrices, restriction):
if not isinstance(restriction, tuple):
restriction = (restriction, 0)
from dune.perftool.generation import get_global_context_value
if get_global_context_value("dry_run"):
# Return dummy data
return (a_matrices, get_counted_variable("buffer"), get_counted_variable("input"), None)
try:
return vectorization_info(a_matrices, restriction, None, None, None, None)
except TypeError:
# Try getting the vectorization info collected after dry run
vec = vectorization_info(a_matrices, restriction, None, None, None, None)
if vec[0] is None:
raise PerftoolError("Sumfact Vectorization data should have been collected in dry run, but wasnt")
return vec
def no_vectorization(sumfacts):
for sumf in sumfacts:
for res in (Restriction.NONE, Restriction.POSITIVE, Restriction.NEGATIVE):
vectorization_info(sumf.a_matrices,
res,
sumf.a_matrices,
get_counted_variable("buffer"),
get_counted_variable(restricted_name("input", sumf.restriction)),
None)
vectorization_info(sumf.a_matrices,
sumf.restriction,
sumf.a_matrices,
get_counted_variable("buffer"),
get_counted_variable(restricted_name("input", sumf.restriction)),
None)
def decide_stage_vectorization_strategy(sumfacts, stage):
stage_sumfacts = frozenset([sf for sf in sumfacts if sf.stage == stage])
def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
stage_sumfacts = frozenset([sf for sf in sumfacts if sf.stage == stage and sf.restriction == restriction])
if len(stage_sumfacts) in (3, 4):
# Map the sum factorization to their position in the joint kernel
position_mapping = {}
......@@ -79,6 +82,7 @@ def decide_stage_vectorization_strategy(sumfacts, stage):
cols=next(iter(stage_sumfacts)).a_matrices[i].cols,
transpose=next(iter(stage_sumfacts)).a_matrices[i].transpose,
derivative=tuple(derivative),
face=next(iter(stage_sumfacts)).a_matrices[i].face,
)
large_a_matrices.append(large)
......@@ -106,8 +110,11 @@ def decide_vectorization_strategy():
if not get_option("vectorize_grads"):
no_vectorization(sumfacts)
else:
decide_stage_vectorization_strategy(sumfacts, 1)
decide_stage_vectorization_strategy(sumfacts, 3)
for stage in (1, 3):
res = (Restriction.NONE, Restriction.POSITIVE, Restriction.NEGATIVE)
import itertools as it
for restriction in it.product(res, res):
decide_stage_vectorization_strategy(sumfacts, stage, restriction)
class HasSumfactMapper(lp.symbolic.CombineMapper):
......
__name = sumfact_poisson_dg_{__exec_suffix}
__exec_suffix = numdiff, symdiff | expand num
__exec_suffix = {diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
diff_suffix = numdiff, symdiff | expand num
quadvec_suffix = quadvec, nonquadvec | expand quad
gradvec_suffix = gradvec, nongradvec | expand grad
cells = 16 16
extension = 1. 1.
......@@ -13,3 +17,5 @@ numerical_jacobian = 1, 0 | expand num
sumfact = 1
exact_solution_expression = g
compare_l2errorsquared = 1e-4
vectorize_quad = 1, 0 | expand quad
vectorize_grads = 1, 0 | expand grad
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment