diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index 9004b2f2bb946ecc0d328f933d919205a174ab10..d633843adf0d7a67919b99ff8b45301c39c0ba3d 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -167,7 +167,7 @@ def collect_vector_data_rotate(knl): # # 1. Rotating the input data - knl = add_vector_view(knl, quantity) + knl = add_vector_view(knl, quantity, flatview=True) include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile") new_insns.append(lp.CallInstruction((), # assignees prim.Call(prim.Variable("transpose_reg"), @@ -289,7 +289,7 @@ def collect_vector_data_rotate(knl): for insn in insns: # Get a vector view of the lhs expression lhsname = get_pymbolic_basename(insn.assignee) - knl = add_vector_view(knl, lhsname, pad_to=vec_size) + knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True) lhsname = get_vector_view_name(lhsname) if rotating: diff --git a/python/dune/perftool/loopy/transformations/vectorview.py b/python/dune/perftool/loopy/transformations/vectorview.py index e0d78e140975d5b0637272597487b609b05777a3..1450b26e39bd26304322175cce53625666f919df 100644 --- a/python/dune/perftool/loopy/transformations/vectorview.py +++ b/python/dune/perftool/loopy/transformations/vectorview.py @@ -16,7 +16,7 @@ def get_vector_view_name(tmpname): return tmpname + "_vec" -def add_vector_view(knl, tmpname, pad_to=None): +def add_vector_view(knl, tmpname, pad_to=None, flatview=False): """ Kernel transformation to add a vector view temporary that interprets the same memory as another temporary @@ -53,11 +53,27 @@ def add_vector_view(knl, tmpname, pad_to=None): if pad_to: size = (size // pad_to + 1) * pad_to + # Some vectorview are intentionally flat! (e.g. the output buffers of + # sum factorization kernels + if flatview: + shape = (size, vecsize) + dim_tags = "c,vec" + else: + assert(temp.shape[-1] == vecsize) + shape = temp.shape + # This works around a loopy weirdness (which might as well be a bug) + # TODO: investigate this! + if len(shape) == 1: + shape = (1, vecsize) + dim_tags = "c,vec" + else: + dim_tags = temp.dim_tags[:-1] + ("vec",) + # Now add a vector view temporary vecname = tmpname + "_vec" temporaries[vecname] = lp.TemporaryVariable(vecname, - dim_tags="c,vec", - shape=(size, vecsize), + dim_tags=dim_tags, + shape=shape, base_storage=bsname, dtype=np.float64, scope=lp.temp_var_scope.PRIVATE,