diff --git a/patches/loopy/Current.patch b/patches/loopy/Current.patch index efb0a773a574162038fd29007ee06d11a1cd002d..95c08bde059e136e7413177af8349049971872d1 100644 --- a/patches/loopy/Current.patch +++ b/patches/loopy/Current.patch @@ -1,11 +1,11 @@ diff --git a/loopy/check.py b/loopy/check.py -index 2f48211..1446d07 100644 +index 7562eac..ac03be0 100644 --- a/loopy/check.py +++ b/loopy/check.py -@@ -286,10 +286,10 @@ class _AccessCheckMapper(WalkMapper): - +@@ -287,10 +287,10 @@ class _AccessCheckMapper(WalkMapper): + shape_domain = shape_domain.intersect(slab) - + - if not access_range.is_subset(shape_domain): - raise LoopyError("'%s' in instruction '%s' " - "accesses out-of-bounds array element" @@ -14,6 +14,31 @@ index 2f48211..1446d07 100644 +# raise LoopyError("'%s' in instruction '%s' " +# "accesses out-of-bounds array element" +# % (expr, self.insn_id)) - - + + def check_bounds(kernel): +@@ -620,7 +620,7 @@ def pre_codegen_checks(kernel): + + check_for_unused_hw_axes_in_insns(kernel) + check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel) +- check_that_temporaries_are_defined_in_subkernels_where_used(kernel) ++# check_that_temporaries_are_defined_in_subkernels_where_used(kernel) + check_that_all_insns_are_scheduled(kernel) + kernel.target.pre_codegen_check(kernel) + check_that_shapes_and_strides_are_arguments(kernel) +diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py +index c490abb..ec68921 100644 +--- a/loopy/codegen/instruction.py ++++ b/loopy/codegen/instruction.py +@@ -219,9 +219,9 @@ def generate_call_code(codegen_state, insn): + + # {{{ vectorization handling + +- if codegen_state.vectorization_info: +- if insn.atomicity: +- raise Unvectorizable("function call") ++# if codegen_state.vectorization_info: ++# if insn.atomicity: ++# raise Unvectorizable("function call") + + # }}} diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index 409b4c2af26b68f374ee264db64b409cd493e72b..510075c9f6ab9fa2173a554915f753cdb3553b86 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -300,4 +300,5 @@ def collect_vector_data_rotate(knl): id="{}_rotateback".format(lhsname), )) - return knl.copy(instructions=new_insns + other_insns) + from loopy.kernel.creation import resolve_dependencies + return resolve_dependencies(knl.copy(instructions=new_insns + other_insns)) diff --git a/python/loopy b/python/loopy index 5afb9fb2162561456697d325618a673314af0263..36c9bb5c0a5905022fc850c3efc5ad7661e5f897 160000 --- a/python/loopy +++ b/python/loopy @@ -1 +1 @@ -Subproject commit 5afb9fb2162561456697d325618a673314af0263 +Subproject commit 36c9bb5c0a5905022fc850c3efc5ad7661e5f897