Skip to content
Snippets Groups Projects
Commit 13ab3cc0 authored by Marcel Koch's avatar Marcel Koch
Browse files

recursive vectorization

parent c14f4f78
No related branches found
No related tags found
No related merge requests found
......@@ -186,11 +186,17 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
from loopy.symbolic import IdentityMapper
class AliasIndexCollector(CombineMapper, IdentityMapper):
def __init__(self):
self.found_alias = False
def combine(self, values):
return sum(values, tuple())
def map_constant(self, expr):
return tuple()
if self.found_alias:
return (expr,)
else:
return tuple()
map_variable = map_constant
map_function_symbol = map_constant
......@@ -198,7 +204,10 @@ def add_vcl_access(knl, inner_iname, vcl_size, level=0):
def map_subscript(self, expr):
if expr.aggregate.name.endswith(alias_suffix):
return expr.aggregate, expr.index_tuple
self.found_alias = True
indices = self.combine((self.rec(index) for index in expr.index_tuple))
self.found_alias = False
return expr.aggregate, indices
else:
return tuple()
......@@ -467,50 +476,48 @@ def vectorize_micro_elements(knl):
orig_iname = vec_iname
if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
vcl_size = get_vcl_type_size(np.float64)
knl = add_iname_array(knl, vec_iname)
has_tail = get_form_option('number_of_blocks') % vcl_size > 0
vectorize_tail = True
def _do_vectorization(knl, vec_iname, vcl_size, level=0):
inner_iname = vec_iname + '_inner'
outer_iname = vec_iname + '_outer'
knl = add_iname_array(knl, vec_iname)
has_tail = get_form_option('number_of_blocks') % vcl_size > 0
vectorize_tail = (vcl_size // 2) > 1 and \
(get_form_option('number_of_blocks') % vcl_size) % (vcl_size // 2) == 0
# manually add tail, since split_iname with slabs tries to vectorize the tail
if has_tail:
vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
knl = knl.copy(domains=domch.get_domains_with(
BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
# manually add tail, since split_iname with slabs tries to vectorize the tail
if has_tail:
# fake suitable loop bound
vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
knl = knl.copy(domains=domch.get_domains_with(
BasicSet('{{ [{0}]: 0<={0}<{1} }}'.format(vec_iname, vectorizable_bound))))
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = realize_tail(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
knl = lp.split_iname(knl, vec_iname, vcl_size, outer_iname=outer_iname, inner_iname=inner_iname)
tail_iname = vec_iname + '_inner' + '_tail'
else:
knl = lp.split_iname(knl, vec_iname, vcl_size)
tail_iname = vec_iname + '_inner' + '_tail'
knl = realize_tail(knl, inner_iname, outer_iname, tail_iname, vcl_size)
else:
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
knl = lp.tag_inames(knl, [(inner_iname, 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, 0, vcl_size)
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, level, vcl_size)
def _do_vectorization(knl, iname_inner, iname_outer, vcl_size, levels=0):
knl = add_vcl_iname_array(knl, orig_iname, iname_inner, vcl_size)
knl = add_vcl_iname_array(knl, orig_iname, inner_iname, vcl_size)
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size)
knl = add_vcl_access(knl, iname_inner, vcl_size, levels)
return knl
knl = add_vcl_accum_insns(knl, inner_iname, outer_iname, vcl_size)
knl = add_vcl_access(knl, inner_iname, vcl_size, level)
knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
if has_tail and vectorize_tail:
vcl_size = vcl_size // 2
vec_iname = tail_iname
if has_tail and vectorize_tail:
from pudb import set_trace; set_trace()
knl = _do_vectorization(knl, tail_iname, vcl_size // 2, level + 1)
knl = lp.split_iname(knl, vec_iname, vcl_size)
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
knl = lp.split_array_axis(knl, array_alias, 1, vcl_size)
return knl
knl = _do_vectorization(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size, levels=1)
knl = _do_vectorization(knl, orig_iname, vcl_size)
return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment