diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index d702ab8cccc69f8b28f5c805ea5457e6b7e3adc5..bb8b2076a134f15fe925cec62cc283789c1b04cb 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -46,6 +46,13 @@ def collect_vector_data_rotate(knl, insns, inames): # TODO infer the numpy type here vec_size = get_vcl_type_size(np.float64) + # Add an iname to the kernel which will be used for vectorization + new_iname = "quad_vec_{}".format("_".join(inames)) + domain = "{{ [{0}] : 0<={0}<{1} }}".format(new_iname, str(vec_size)) + domain = parse_domains(domain, {}) + knl = knl.copy(domains=knl.domains + domain) + knl = lp.tag_inames(knl, [(new_iname, "vec")]) + # # Inspect the given instructions for dependent quantities # @@ -73,7 +80,7 @@ def collect_vector_data_rotate(knl, insns, inames): ) replacemap_arr[quantity] = prim.Subscript(prim.Variable(arrname), (prim.Variable('rotate_index'),)) - replacemap_vec[expr] = prim.Variable(get_vector_view_name(arrname)) + replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(arrname)), (0, prim.Variable(new_iname),)) write_match = lp.match.Or(tuple(lp.match.Writes(q) for q in quantities)) iname_match = lp.match.And(tuple(lp.match.Iname(i) for i in inames)) @@ -136,49 +143,38 @@ def collect_vector_data_rotate(knl, insns, inames): id="update_rotate_index", )) + # Determine the condition for the continue statement + upper_bound = prim.Product(tuple(pw_aff_to_expr(knl.get_iname_bounds(i).size) for i in inames)) + total_check = prim.Comparison(prim.Variable("total_index"), "<", upper_bound) + rotate_check = prim.Comparison(prim.Variable("rotate_index"), "!=", 0) + check = prim.LogicalAnd((rotate_check, total_check)) + + # Insert the 'continue' statement + new_insns.append(lp.CInstruction((), # iname exprs that the code needs access to + "continue;", # the code + predicates=frozenset({check}), + depends_on=frozenset({"update_rotate_index", "update_total_index"}).union(frozenset([i.id for i in write_insns])), + depends_on_is_final=True, + within_inames=common_inames.union(inames), + within_inames_is_final=True, + id="continue_stmt", + )) + # - # Construct a flat loop for the given instructions + # Reconstruct the compute instructions # -# new_insns = [] -# other_insns = [i for i in knl.instructions if i.id not in [j.id for j in insns]] -# -# size = prim.Product(tuple(pw_aff_to_expr(knl.get_iname_bounds(i).size) for i in inames)) -# size = prim.FloorDiv(size, vec_size) -# -# temporaries = knl.temporary_variables -# temporaries["flatsize"] = lp.TemporaryVariable("flatsize", -# dtype=np.int32, -# shape=(), -# ) -# new_insns.append(lp.Assignment(prim.Variable("flatsize"), -# size, -# ) -# ) -# -# # Add an additional domain to the kernel -# new_iname = "flat_{}".format("_".join(inames)) -# domain = "{{ [{0}] : 0<={0}<flatsize }}".format(new_iname, str(size)) -# domain = parse_domains(domain, {}) -# knl = knl.copy(domains=knl.domains + domain, -# temporary_variables=temporaries) -# -# # Split and tag the flat iname -# knl = lp.split_iname(knl, new_iname, vec_size, inner_tag="vec") -# new_inames = ("{}_outer".format(new_iname), "{}_inner".format(new_iname)) -# knl = lp.assume(knl, "flatsize mod {} = 0".format(vec_size)) -# -# for insn in insns: -# # Get a vector view of the lhs expression -# lhsname = get_pymbolic_basename(insn.assignee) -# knl = add_vector_view(knl, lhsname) -# lhsname = get_vector_view_name(lhsname) -# -# new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname), tuple(prim.Variable(i) for i in new_inames)), -# prim.Subscript(prim.Variable(get_vector_view_name("wk_precomputed")), tuple(prim.Variable(i) for i in new_inames)), -# within_inames=frozenset(new_inames), -# within_inames_is_final=True, -# ) -# ) + for insn in insns: + # Get a vector view of the lhs expression + lhsname = get_pymbolic_basename(insn.assignee) + knl = add_vector_view(knl, lhsname) + lhsname = get_vector_view_name(lhsname) + + new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname), (prim.FloorDiv(prim.Variable("total_size"), vec_size), prim.Variable(new_iname))), + substitute(insn.expression, replacemap_vec), + within_inames=frozenset(inames + (new_iname,)), + within_inames_is_final=True, + ) + ) return knl.copy(instructions=new_insns + other_insns) diff --git a/python/dune/perftool/loopy/transformations/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py index dd36969bd57a0a26a697e897039e50ac024a60a1..5d72fb4d263e1217fc16cd7b1c95b22a71b5eec3 100644 --- a/python/dune/perftool/loopy/transformations/vectorview.py +++ b/python/dune/perftool/loopy/transformations/vectorview.py @@ -43,9 +43,18 @@ def add_vector_view(knl, tmpname): shape=(size, vecsize), base_storage=tmpname + "_base", dtype=np.float64, + scope=lp.temp_var_scope.PRIVATE, ) - return knl.copy(temporary_variables=temporaries) + # Avoid that any of these temporaries are eliminated + silenced = ['temp_to_write({})'.format(tmpname), + 'temp_to_write({})'.format(vecname), + 'read_no_write({})'.format(tmpname), + 'read_no_write({})'.format(vecname), + ] + + return knl.copy(temporary_variables=temporaries, + silenced_warnings=knl.silenced_warnings + silenced) def add_temporary_with_vector_view(knl, name, *args, **kwargs):