Skip to content
Snippets Groups Projects
Commit f7909dba authored by René Heß's avatar René Heß
Browse files

Merge branch 'feature/mix-vectorizing-stage-1-and-3' into 'master'

Refactor vector indices in collect_rotate

See merge request !110
parents d4ff20be fad8f909
No related branches found
No related tags found
No related merge requests found
......@@ -30,10 +30,21 @@ def rotate_function_mangler(knl, func, arg_dtypes):
# This is not 100% within the loopy philosophy, as we are
# passing the vector registers as references and have them
# changed. Loopy assumes this function to be read-only.
include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile")
vcl = lp.types.NumpyType(get_vcl_type(np.float64, register_size=256))
return lp.CallMangleInfo("transpose_reg", (), (vcl, vcl, vcl, vcl))
class VectorIndices(object):
def __init__(self):
self.needed = set()
def get(self, increment):
name = "vec_index_inc{}".format(increment)
self.needed.add((name, increment))
return prim.Variable(name)
def collect_vector_data_rotate(knl):
#
# Process/Assert/Standardize the input
......@@ -51,6 +62,7 @@ def collect_vector_data_rotate(knl):
# Determine the vector lane width
# TODO infer the numpy type here
vec_size = get_vcl_type_size(np.float64)
vector_indices = VectorIndices()
# Add an iname to the kernel which will be used for vectorization
new_iname = "quad_vec_{}".format("_".join(inames))
......@@ -61,8 +73,6 @@ def collect_vector_data_rotate(knl):
new_insns = []
all_writers = []
tags = frozenset().union(*tuple(i.tags for i in insns))
rotating = "gradvec" in tags
#
# Inspect the given instructions for dependent quantities
......@@ -168,10 +178,9 @@ def collect_vector_data_rotate(knl):
# 1. Rotating the input data
knl = add_vector_view(knl, quantity, flatview=True)
include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile")
new_insns.append(lp.CallInstruction((), # assignees
prim.Call(prim.Variable("transpose_reg"),
tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (vector_indices.get(vec_size) + i, prim.Variable(new_iname))) for i in range(4))),
depends_on=frozenset({'continue_stmt'}),
within_inames=common_inames.union(inames).union(frozenset({new_iname})),
within_inames_is_final=True,
......@@ -183,14 +192,14 @@ def collect_vector_data_rotate(knl):
assert isinstance(expr, prim.Subscript)
last_index = expr.index[-1]
replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
(prim.Variable("vec_index") + last_index, prim.Variable(new_iname)),
(vector_indices.get(vec_size) + last_index, prim.Variable(new_iname)),
)
else:
# Add a vector view to this quantity
expr, = quantities[quantity]
knl = add_vector_view(knl, quantity, flatview=True)
replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
(prim.Variable("vec_index"), prim.Variable(new_iname)),
(vector_indices.get(1), prim.Variable(new_iname)),
)
other_insns = [i for i in knl.instructions if i.id not in [j.id for j in insns + new_insns]]
......@@ -219,26 +228,6 @@ def collect_vector_data_rotate(knl):
id="update_total_index",
))
# Insert a flat consecutive counter 'vec_index', which is increased after a vector chunk is handled
temporaries['vec_index'] = lp.TemporaryVariable('vec_index', # name
dtype=np.int32,
scope=lp.temp_var_scope.PRIVATE,
)
new_insns.append(lp.Assignment(prim.Variable("vec_index"), # assignee
0, # expression
within_inames=common_inames,
within_inames_is_final=True,
id="assign_vec_index",
))
new_insns.append(lp.Assignment(prim.Variable("vec_index"), # assignee
prim.Sum((prim.Variable("vec_index"), vec_size if rotating else 1)), # expression
within_inames=common_inames.union(inames),
within_inames_is_final=True,
depends_on=frozenset({Tagged("vec_write"), "assign_vec_index"}),
depends_on_is_final=True,
id="update_vec_index",
))
# Insert a rotating index, that counts 0 , .. , vecsize - 1
temporaries['rotate_index'] = lp.TemporaryVariable('rotate_index', # name
dtype=np.int32,
......@@ -291,16 +280,19 @@ def collect_vector_data_rotate(knl):
lhsname = get_pymbolic_basename(insn.assignee)
knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True)
lhsname = get_vector_view_name(lhsname)
rotating = "gradvec" in insn.tags
if rotating:
assert isinstance(insn.assignee, prim.Subscript)
last_index = insn.assignee.index[-1]
assert last_index in tuple(range(4))
vec_index_size = vec_size
else:
last_index = 0
vec_index_size = 1
new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname),
(prim.Variable("vec_index") + last_index, prim.Variable(new_iname)),
(vector_indices.get(vec_index_size) + last_index, prim.Variable(new_iname)),
),
substitute(insn.expression, replacemap_vec),
depends_on=frozenset({"continue_stmt"}),
......@@ -316,12 +308,33 @@ def collect_vector_data_rotate(knl):
if rotating and "{}_rotateback".format(lhsname) not in [i.id for i in new_insns]:
new_insns.append(lp.CallInstruction((), # assignees
prim.Call(prim.Variable("transpose_reg"),
tuple(prim.Subscript(prim.Variable(lhsname), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
tuple(prim.Subscript(prim.Variable(lhsname), (vector_indices.get(vec_size) + i, prim.Variable(new_iname))) for i in range(4))),
depends_on=frozenset({Tagged("vec_write")}),
within_inames=common_inames.union(inames).union(frozenset({new_iname})),
within_inames_is_final=True,
id="{}_rotateback".format(lhsname),
))
# Add the necessary vector indices
for name, increment in vector_indices.needed:
temporaries[name] = lp.TemporaryVariable(name, # name
dtype=np.int32,
scope=lp.temp_var_scope.PRIVATE,
)
new_insns.append(lp.Assignment(prim.Variable(name), # assignee
0, # expression
within_inames=common_inames,
within_inames_is_final=True,
id="assign_{}".format(name),
))
new_insns.append(lp.Assignment(prim.Variable(name), # assignee
prim.Sum((prim.Variable(name), increment)), # expression
within_inames=common_inames.union(inames),
within_inames_is_final=True,
depends_on=frozenset({Tagged("vec_write"), "assign_{}".format(name)}),
depends_on_is_final=True,
id="update_{}".format(name),
))
from loopy.kernel.creation import resolve_dependencies
return resolve_dependencies(knl.copy(instructions=new_insns + other_insns))
......@@ -174,7 +174,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
expression=0,
forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
forced_iname_deps_is_final=True,
tags=frozenset(["quadvec"])
tags=frozenset(["quadvec", "gradvec"])
)
# Replace gradient iname with correct index for assignement
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment