diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index 36990176cd350fbf3d938ac185b715149a00314e..69ad01cb31f211066e074f7d3318e62fd57402ef 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -55,7 +55,7 @@ def get_temporary_name(): @generator_factory(item_tags=("temporary",), cache_key_generator=lambda n, **kw: n) def temporary_variable(name, **kwargs): from dune.perftool.loopy.temporary import DuneTemporaryVariable - return DuneTemporaryVariable(name, **kwargs) + return DuneTemporaryVariable(name, scope=loopy.temp_var_scope.LOCAL, **kwargs) # Now define generators for instructions. To ease dependency handling of instructions diff --git a/python/dune/perftool/loopy/collectvector.py b/python/dune/perftool/loopy/collectvector.py index b93eea31521321dcfa6943bf6be709f178f6a467..6de0d6622b2c4c391733bef1b7564e6cabe6c9f3 100644 --- a/python/dune/perftool/loopy/collectvector.py +++ b/python/dune/perftool/loopy/collectvector.py @@ -3,6 +3,8 @@ from dune.perftool.loopy.vcl import VCLLoad, VCLStore from dune.perftool.tools import get_pymbolic_basename +from loopy.symbolic import pw_aff_to_expr + from pymbolic.mapper.dependency import DependencyMapper from pymbolic.mapper.substitutor import substitute @@ -97,18 +99,9 @@ def collect_vector_data(knl, insns, inames, vec_size=4): # Assert some assumptions on the instructions # - # All instructions within the given inames are either to be vectorized - # or write a dependent quantity - for insn in other_insns + dep_insns: - len(insn.within_inames.intersection(inames) == 0) - # An instruction occurs in but one of these groups: assert len(set(insns + write_insns + dep_insns + other_insns)) == len(insns + write_insns + dep_insns + other_insns) - # All the target and write instructions are Assignments - for insn in insns + write_insns: - assert isinstance(insn, lp.Assignment) - # Analyse the inames of the given instructions and identify inames # that they all have in common. Those inames will also be iname dependencies # of inserted instruction. @@ -122,13 +115,12 @@ def collect_vector_data(knl, insns, inames, vec_size=4): # Insert a flat consecutive counter 'total_index' temporaries['total_index'] = lp.TemporaryVariable('total_index', # name dtype=np.int32, + scope=lp.temp_var_scope.LOCAL, ) new_insns.append(lp.Assignment(prim.Variable("total_index"), # assignee 0, # expression within_inames=common_inames, within_inames_is_final=True, - depends_on=frozenset(i.id for i in dep_insns), - depends_on_is_final=True, id="assign_total_index", )) new_insns.append(lp.Assignment(prim.Variable("total_index"), # assignee @@ -143,13 +135,12 @@ def collect_vector_data(knl, insns, inames, vec_size=4): # Insert a rotating index, that counts 0 , .. , vecsize - 1 temporaries['rotate_index'] = lp.TemporaryVariable('rotate_index', # name dtype=np.int32, + scope=lp.temp_var_scope.LOCAL, ) new_insns.append(lp.Assignment(prim.Variable("rotate_index"), # assignee 0, # expression within_inames=common_inames, within_inames_is_final=True, - depends_on=frozenset(i.id for i in dep_insns), - depends_on_is_final=True, id="assign_rotate_index", )) new_insns.append(lp.Assignment(prim.Variable("rotate_index"), # assignee @@ -177,6 +168,7 @@ def collect_vector_data(knl, insns, inames, vec_size=4): shape=(vec_size,), dim_tags="c", base_storage=quantity + '_base_storage', + scope=lp.temp_var_scope.LOCAL, ) vecname = quantity + '_buffered_vec' @@ -185,20 +177,32 @@ def collect_vector_data(knl, insns, inames, vec_size=4): shape=(vec_size,), dim_tags="vec", base_storage=quantity + '_base_storage', + scope=lp.temp_var_scope.LOCAL, ) replacemap_arr[quantity] = prim.Subscript(prim.Variable(arrname), (prim.Variable('rotate_index'),)) replacemap_vec[quantity_expr] = prim.Variable(vecname) for insn in write_insns: - new_insns.append(insn.copy(assignee=replacemap_arr[get_pymbolic_basename(insn.assignee)], - ) - ) + if isinstance(insn, lp.Assignment): + new_insns.append(insn.copy(assignee=replacemap_arr[get_pymbolic_basename(insn.assignee)], + ) + ) + if isinstance(insn, lp.CInstruction): + # TODO: What do we do about CInstructions? + # Example: detjac = ... + new_insns.append(insn) + + # Determine the condition for the continue statement + upper_bound = prim.Product(tuple(pw_aff_to_expr(knl.get_iname_bounds(i).size) for i in inames)) + total_check = prim.Comparison(prim.Variable("total_index"), "<", upper_bound) + rotate_check = prim.Comparison(prim.Variable("rotate_index"), "!=", 0) + check = prim.LogicalAnd((rotate_check, total_check)) # Insert the 'continue' statement new_insns.append(lp.CInstruction((), # iname exprs that the code needs access to "continue;", # the code - predicates=frozenset({"rotate_index != 0", "blubb"}), + predicates=frozenset({check}), depends_on=frozenset({"update_rotate_index", "update_total_index"}).union(frozenset([i.id for i in write_insns])), depends_on_is_final=True, within_inames=common_inames.union(inames), @@ -234,6 +238,7 @@ def collect_vector_data(knl, insns, inames, vec_size=4): shape=(vec_size,), dim_tags="vec", base_storage="{}_base".format(basename), + scope=lp.temp_var_scope.LOCAL, ) new_insns.append(insn.copy(assignee=prim.Variable(name), expression=substitute(insn.expression, variable_assignments=replacemap_vec), diff --git a/python/dune/perftool/loopy/vcl.py b/python/dune/perftool/loopy/vcl.py index 360fe5aaa4e6f8e740667ff68d0dc1e692b6e2b3..272e0ec7097a2ac07e0d19f1e1b439ed1a208da8 100644 --- a/python/dune/perftool/loopy/vcl.py +++ b/python/dune/perftool/loopy/vcl.py @@ -1,7 +1,9 @@ """ Our extensions to the loopy type system """ -from dune.perftool.generation import function_mangler +from dune.perftool.generation import (function_mangler, + include_file, + ) from loopy.symbolic import FunctionIdentifier from loopy.types import NumpyType @@ -82,4 +84,5 @@ class VCLStore(FunctionIdentifier): @function_mangler def vcl_mangler(target, func, dtypes): if isinstance(func, (VCLLoad, VCLStore)): + include_file("dune/perftool/vectorclass/vectorclass.h", filetag="operatorfile") return CallMangleInfo(func.name, (), (NumpyType(np.int32),)) diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index 7ad15f1e1bf7ed72aa08fcdd732dcb780d5ee9a0..91e45c8ed0b031f9549e9e6b2fd942b57ab24f0b 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -200,6 +200,10 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): # Mark the transformation that moves the quadrature loop inside the trialfunction loops for application transform(nest_quadrature_loops, visitor.inames) + #TODO!!! + from dune.perftool.loopy.collectvector import collect_vector_data + transform(collect_vector_data, [contrib_dep], quadrature_inames()) + def sum_factorization_kernel(a_matrices, buf, insn_dep=frozenset({}), additional_inames=frozenset({})): """