diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py index a5edfc73e82b2c468a6dece2d053014a33f45af5..8ebe9355593e9404bea6fd4fdc2a74161ad22a14 100644 --- a/python/dune/perftool/blockstructured/vectorization.py +++ b/python/dune/perftool/blockstructured/vectorization.py @@ -33,6 +33,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): vng = knl.get_var_name_generator() idg = knl.get_instruction_id_generator() new_vec_temporaries = dict() + vcl_size = get_vcl_type_size(np.float64) for insn in knl.instructions: # somehow CInstructions are not hashable.... if isinstance(insn, lp.MultiAssignmentBase) and insn in accum_insns: @@ -61,11 +62,11 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): identifier_b = vng('b') new_vec_temporaries[identifier_a] = DuneTemporaryVariable(identifier_a, dtype=np.float64, shape=(2,)*(world_dimension()-1) - +(get_vcl_type_size(np.float64),), + +(vcl_size,), managed=True, scope=lp.temp_var_scope.PRIVATE, dim_tags=('f',)*(world_dimension()-1)+('vec',)) new_vec_temporaries[identifier_b] = DuneTemporaryVariable(identifier_b, dtype=np.float64, - shape=(get_vcl_type_size(np.float64),), managed=True, + shape=(vcl_size,), managed=True, scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) var_a = prim.Subscript(prim.Variable(identifier_a), @@ -108,7 +109,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): # r+=a[iy] id_accum = idg('insn_mod_accum') expr_accum = prim.Sum((var_a, - prim.Call(prim.Variable('permute4d<-1,{}>'.format(','.join(map(str,range(get_vcl_type_size(np.float64)-1))))), + prim.Call(prim.Variable('permute{}d<-1,{}>'.format(vcl_size,','.join(map(str,range(vcl_size-1))))), (var_b,)), substitute(insn.assignee, {iname_ix:0}))) new_insns.append(lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}), @@ -121,7 +122,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): ) # a[iy] = permute id_permute = idg('insn_permute') - expr_permute = prim.Call(prim.Variable('permute4d<3,{}>'.format(','.join(['-1']*(get_vcl_type_size(np.float64)-1)))), + expr_permute = prim.Call(prim.Variable('permute{}d<3,{}>'.format(vcl_size,','.join(['-1']*(vcl_size-1)))), (var_b,)) new_insns.append(lp.Assignment(assignee=var_a, expression=expr_permute, @@ -134,7 +135,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer): # tail handling, uses tail alias id_accum_tail = idg('insn_accum_tail') - subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")//get_vcl_type_size(np.float64), + subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")//vcl_size, iname_ix: 0, insn.assignee_name: prim.Variable(insn.assignee_name+'_tail'), **replace_tail_inames} assignee_tail = substitute(insn.assignee, subst_map)