Skip to content
Snippets Groups Projects
Commit 2c0878ee authored by Marcel Koch's avatar Marcel Koch
Browse files

make vectorization functions explicitly dependend on vector size

parent 55cf2cac
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ import loopy as lp
import numpy as np
import pymbolic.primitives as prim
from loopy.match import Tagged, Id, Writes, And, Or, Iname, All
from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All
from islpy import BasicSet
from dune.codegen.generation import get_global_context_value
......@@ -15,25 +15,25 @@ from dune.codegen.pdelab.geometry import world_dimension
from dune.codegen.tools import get_pymbolic_basename
def add_vcl_temporaries(knl):
def add_vcl_temporaries(knl, vcl_size):
vector_alias = [a for a in knl.arg_dict if a.endswith('alias')]
# add new temporaries for vectors
# hope one read insn doesn't have two different reads from the same temporary
new_vec_temporaries = dict()
new_insns = []
init_iname = 'init_vec'
init_iname = 'init_vec{}'.format(vcl_size)
from islpy import BasicSet
init_domain = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(init_iname, get_vcl_type_size(dtype_floatingpoint())))
for alias in vector_alias:
vector_name = alias.replace('alias', 'vec')
vector_name = alias.replace('alias', 'vec{}'.format(vcl_size))
new_vec_temporaries[vector_name] = DuneTemporaryVariable(vector_name, dtype=np.float64,
shape=(get_vcl_type_size(np.float64),), managed=True,
shape=(vcl_size,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
# write once to the vector such that loopy won't complain
new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(vector_name), prim.Variable(init_iname)),
expression=0, within_inames=frozenset({init_iname}),
id='init_' + vector_name))
id='init_{}'.format(vector_name)))
from loopy.kernel.data import VectorizeTag
return knl.copy(instructions=knl.instructions + new_insns, domains=knl.domains + [init_domain],
......@@ -41,9 +41,8 @@ def add_vcl_temporaries(knl):
iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()}))
def add_vcl_accum_insns(knl, iname_inner, iname_outer):
def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size):
nptype = dtype_floatingpoint()
vcl_size = get_vcl_type_size(np.float64)
accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))
accum_ids = [insn.id for insn in accum_insns]
......@@ -59,7 +58,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee))
inames_micro = set((i for i in insn.within_inames if i.startswith('micro')))
iname_ix = next((i for i in inames_micro if i.endswith("_x")))
iname_ix = next((i for i in inames_micro if '_x' in i))
# need inames for head and tail handling a priori
from loopy.match import Not, All
......@@ -76,8 +75,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
inames_tail = frozenset((var.name for var in replace_tail_inames.values()))
# erstelle a[iy] und b
identifier_left = vng('left_node')
identifier_right = vng('right_node')
identifier_left = vng('left_node_vec{}'.format(vcl_size))
identifier_right = vng('right_node_vec{}'.format(vcl_size))
new_vec_temporaries[identifier_left] = DuneTemporaryVariable(identifier_left, dtype=np.float64,
shape=(2,) * (world_dimension() - 1) +
(vcl_size,),
......@@ -100,7 +99,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
id=id_init_a,
within_inames=(insn.within_inames - frozenset({iname_outer}) -
inames_micro) | inames_head,
tags=frozenset({'head'})))
tags=frozenset({'head_vec{}'.format(vcl_size)})))
# setze werte für a und b
expr_right = substitute(expr_without_r, {iname_ix: 1})
......@@ -131,7 +130,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
depends_on=insn.depends_on | frozenset({id_set_left,
id_init_a, id_set_right}),
within_inames=insn.within_inames - frozenset({iname_ix}),
tags=frozenset({'accum'})))
tags=frozenset({'accum_vec{}'.format(vcl_size)})))
# a[iy] = permute
id_permute = idg('{}_permute'.format(insn.id))
expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)),
......@@ -162,7 +161,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
frozenset(write_to_tail_ids)),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer}) -
inames_micro) | inames_tail,
tags=frozenset({'tail'})))
tags=frozenset({'tail_vec{}'.format(vcl_size)})))
else:
if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids:
accum_id = insn.id.replace('_tail', '')
......@@ -174,12 +173,16 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries))
def add_vcl_access(knl, iname_inner):
def add_vcl_access(knl, iname_inner, vcl_size):
from loopy.match import Reads, Tagged
accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))))
accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)),
Iname(iname_inner))))))
read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner))))))
vectorized_insns = accum_insns | read_insns
alias_suffix = 'alias'
vector_sufix = 'vec{}'.format(vcl_size)
from loopy.symbolic import CombineMapper
from loopy.symbolic import IdentityMapper
......@@ -195,7 +198,7 @@ def add_vcl_access(knl, iname_inner):
map_loopy_function_identifier = map_constant
def map_subscript(self, expr):
if expr.aggregate.name.endswith('alias'):
if expr.aggregate.name.endswith(alias_suffix):
return expr.aggregate, expr.index_tuple
else:
return tuple()
......@@ -213,7 +216,7 @@ def add_vcl_access(knl, iname_inner):
alias, index = aic(insn.expression)
name_alias = alias.name
name_vec = name_alias.replace('alias', 'vec')
name_vec = name_alias.replace(alias_suffix, vector_sufix)
vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
# compute index without vec iname
......@@ -240,7 +243,7 @@ def add_vcl_access(knl, iname_inner):
alias, index = aic(insn.expression)
name_alias = alias.name
name_vec = name_alias.replace('alias', 'vec')
name_vec = name_alias.replace(alias_suffix, vector_sufix)
vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
# flat index without vec iname
......@@ -260,7 +263,7 @@ def add_vcl_access(knl, iname_inner):
write_ids)))
# replace alias with vcl vector, except for accumulation assignee
vector_alias = [a for a in knl.arg_dict if a.endswith('alias')]
vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)]
dim = world_dimension()
dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)]
# remove CInstructions since loopy extract expects to get only assignments
......@@ -287,7 +290,8 @@ def add_vcl_access(knl, iname_inner):
parameters=parameters)
new_subst = knl_with_subst_insns.substitutions.copy()
rule = new_subst[alias + '_subst']
rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),))
rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)),
(prim.Variable('ex_i'),))
knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst)
knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner))
......@@ -308,7 +312,7 @@ def add_vcl_access(knl, iname_inner):
try:
assignee_vec = next((expr for expr in insn.expression.children
if isinstance(expr, prim.Subscript) and
expr.aggregate.name.replace('vec', 'alias') ==
expr.aggregate.name.replace(vector_sufix, alias_suffix) ==
assignee_alias.aggregate.name.replace('dummy_', '')))
except StopIteration:
from dune.codegen.error import CodegenVectorizationError
......@@ -431,10 +435,10 @@ def vectorize_micro_elements(knl):
if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
vcl_size = get_vcl_type_size(np.float64)
knl = add_iname_array(knl, vec_iname)
has_tail = get_form_option('number_of_blocks') % vcl_size > 0
# manually add tail, since split_iname with slabs tries to vectorize the tail
if get_form_option('number_of_blocks') % vcl_size > 0:
if has_tail:
vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size
from loopy.kernel.tools import DomainChanger
domch = DomainChanger(knl, (vec_iname,))
......@@ -448,12 +452,14 @@ def vectorize_micro_elements(knl):
knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
knl = add_iname_array(knl, vec_iname)
array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')]
knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size)
knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec'))
knl = add_vcl_temporaries(knl)
knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer')
knl = add_vcl_access(knl, vec_iname + '_inner')
knl = add_vcl_temporaries(knl, vcl_size)
knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
knl = add_vcl_access(knl, vec_iname + '_inner', vcl_size)
return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment