Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dune-codegen
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Christian Heinigk
dune-codegen
Commits
ef9e2a22
Commit
ef9e2a22
authored
6 years ago
by
Marcel Koch
Browse files
Options
Downloads
Patches
Plain Diff
add dependencies to reduce loopy warnings
parent
21f86f45
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
python/dune/perftool/blockstructured/accumulation.py
+4
-1
4 additions, 1 deletion
python/dune/perftool/blockstructured/accumulation.py
python/dune/perftool/blockstructured/vectorization.py
+30
-10
30 additions, 10 deletions
python/dune/perftool/blockstructured/vectorization.py
with
34 additions
and
11 deletions
python/dune/perftool/blockstructured/accumulation.py
+
4
−
1
View file @
ef9e2a22
...
...
@@ -9,6 +9,8 @@ from dune.perftool.generation.loopy import function_mangler, globalarg
import
loopy
as
lp
import
pymbolic.primitives
as
prim
from
loopy.match
import
Writes
def
name_accumulation_alias
(
container
,
accumspace
):
name
=
container
+
"
_
"
+
accumspace
.
lfs
.
name
+
"
_alias
"
...
...
@@ -64,5 +66,6 @@ def generate_accumulation_instruction(expr, visitor):
forced_iname_deps
=
frozenset
(
lfs_inames
).
union
(
frozenset
(
quad_inames
)),
forced_iname_deps_is_final
=
True
,
predicates
=
predicates
,
tags
=
frozenset
({
'
accum
'
})
tags
=
frozenset
({
'
accum
'
}),
depends_on
=
frozenset
({
Writes
(
accumvar_alias
)})
)
This diff is collapsed.
Click to expand it.
python/dune/perftool/blockstructured/vectorization.py
+
30
−
10
View file @
ef9e2a22
...
...
@@ -2,17 +2,16 @@ import loopy as lp
import
numpy
as
np
import
pymbolic.primitives
as
prim
from
loopy.match
import
Tagged
,
Id
from
loopy.match
import
Tagged
,
Id
,
Writes
,
Or
from
dune.perftool.generation
import
get_global_context_value
from
dune.perftool.generation
import
get_global_context_value
,
silenced_warning
from
dune.perftool.loopy.target
import
dtype_floatingpoint
from
dune.perftool.loopy.temporary
import
DuneTemporaryVariable
from
dune.perftool.loopy.symbolic
import
substitute
from
dune.perftool.loopy.vcl
import
get_vcl_type_size
,
VCLPermute
,
VCLLoad
,
VCLStore
from
dune.perftool.options
import
get_form_option
from
dune.perftool.pdelab.argument
import
PDELabAccumulationFunction
from
dune.perftool.pdelab.geometry
import
world_dimension
from
dune.perftool.tools
import
get_pymbolic_
indices
from
dune.perftool.tools
import
get_pymbolic_
basename
def
add_vcl_temporaries
(
knl
):
...
...
@@ -152,10 +151,14 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
assignee_tail
=
substitute
(
insn
.
assignee
,
subst_map
)
expr_tail
=
prim
.
Sum
((
substitute
(
var_left
,
{
iname_inner
:
0
,
**
replace_tail_inames
}),
assignee_tail
))
write_to_tail_ids
=
tuple
(
i
.
id
for
i
in
lp
.
find_instructions
(
knl
,
Writes
(
get_pymbolic_basename
(
assignee_tail
))))
new_insns
.
append
(
lp
.
Assignment
(
assignee
=
assignee_tail
,
expression
=
expr_tail
,
id
=
id_accum_tail
,
depends_on
=
frozenset
({
id_accum
,
id_permute
,
id_set_left
,
id_init_a
}),
depends_on
=
frozenset
({
id_accum
,
id_permute
,
id_set_left
,
id_init_a
})
|
frozenset
(
write_to_tail_ids
),
within_inames
=
(
insn
.
within_inames
-
frozenset
({
iname_inner
,
iname_outer
})
-
inames_micro
)
|
inames_tail
,
tags
=
frozenset
({
'
tail
'
})))
...
...
@@ -170,6 +173,7 @@ def add_vcl_access(knl, iname_inner):
from
loopy.match
import
Reads
,
Tagged
accum_insns
=
set
((
insn
.
id
for
insn
in
lp
.
find_instructions
(
knl
,
Tagged
(
'
accum
'
))))
read_insns
=
set
((
insn
.
id
for
insn
in
lp
.
find_instructions
(
knl
,
Reads
(
'
*alias
'
))))
vectorized_insns
=
accum_insns
|
read_insns
from
loopy.symbolic
import
CombineMapper
from
loopy.symbolic
import
IdentityMapper
...
...
@@ -198,23 +202,29 @@ def add_vcl_access(knl, iname_inner):
aic
=
AliasIndexCollector
()
load_insns
=
[]
read_dependencies
=
dict
()
vectorized_insn_to_vector_names
=
dict
()
for
id
in
read_insns
:
insn
=
knl
.
id_to_insn
[
id
]
alias
,
index
=
aic
(
insn
.
expression
)
name_alias
=
alias
.
name
name_vec
=
name_alias
.
replace
(
'
alias
'
,
'
vec
'
)
vectorized_insn_to_vector_names
[
id
]
=
(
name_alias
,
name_vec
)
# compute index without vec iname
strides
=
tuple
(
tag
.
stride
for
tag
in
knl
.
arg_dict
[
name_alias
].
dim_tags
)
index
=
prim
.
Sum
(
tuple
(
prim
.
Product
((
i
,
s
))
for
i
,
s
in
zip
(
index
,
strides
)
if
i
!=
0
and
i
.
name
!=
iname_inner
))
# find write insns
write_ids
=
frozenset
(
i
.
id
for
i
in
lp
.
find_instructions
(
knl
,
Or
((
Writes
(
name_vec
),
Writes
(
name_vec
)))))
# add load instruction
load_id
=
idg
(
'
insn_
'
+
name_vec
+
'
_load
'
)
call_load
=
prim
.
Call
(
VCLLoad
(
name_vec
),
(
prim
.
Sum
((
prim
.
Variable
(
name_alias
),
index
)),))
load_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_load
,
id
=
load_id
,
within_inames
=
insn
.
within_inames
|
insn
.
reduction_inames
(),))
id
=
load_id
,
within_inames
=
insn
.
within_inames
|
insn
.
reduction_inames
(),
depends_on
=
insn
.
depends_on
|
write_ids
,))
read_dependencies
.
setdefault
(
id
,
set
())
read_dependencies
[
id
].
add
(
load_id
)
...
...
@@ -226,18 +236,23 @@ def add_vcl_access(knl, iname_inner):
alias
,
index
=
aic
(
insn
.
expression
)
name_alias
=
alias
.
name
name_vec
=
name_alias
.
replace
(
'
alias
'
,
'
vec
'
)
vectorized_insn_to_vector_names
[
id
]
=
(
name_alias
,
name_vec
)
# flat index without vec iname
strides
=
tuple
(
tag
.
stride
for
tag
in
knl
.
arg_dict
[
name_alias
].
dim_tags
)
index
=
prim
.
Sum
(
tuple
(
prim
.
Product
((
i
,
s
))
for
i
,
s
in
zip
(
index
,
strides
)
if
i
!=
0
and
i
.
name
!=
iname_inner
))
# find write insns
write_ids
=
frozenset
(
i
.
id
for
i
in
lp
.
find_instructions
(
knl
,
Or
((
Writes
(
name_vec
),
Writes
(
name_vec
)))))
# add store instruction
store_id
=
idg
(
'
insn_
'
+
name_vec
+
'
_store
'
)
call_store
=
prim
.
Call
(
VCLStore
(
name_vec
),
(
prim
.
Sum
((
prim
.
Variable
(
name_alias
),
index
)),))
store_insns
.
append
(
lp
.
CallInstruction
(
assignees
=
(),
expression
=
call_store
,
id
=
store_id
,
within_inames
=
insn
.
within_inames
,
depends_on
=
insn
.
depends_on
|
frozenset
({
id
})
|
read_dependencies
[
id
]))
depends_on
=
insn
.
depends_on
|
frozenset
({
id
})
|
read_dependencies
[
id
]
|
write_ids
))
# replace alias with vcl vector, except for accumulation assignee
vector_alias
=
[
a
for
a
in
knl
.
arg_dict
if
a
.
endswith
(
'
alias
'
)]
...
...
@@ -279,9 +294,12 @@ def add_vcl_access(knl, iname_inner):
# add store and load dependencies and set right accumulation assignee
new_insns
=
[]
for
insn
in
knl
.
instructions
:
if
insn
.
id
not
in
read_insns
|
accum
_insns
:
if
insn
.
id
not
in
vectorized
_insns
:
new_insns
.
append
(
insn
)
else
:
# find write insns
name_alias
,
name_vec
=
vectorized_insn_to_vector_names
[
insn
.
id
]
write_ids
=
frozenset
(
i
.
id
for
i
in
lp
.
find_instructions
(
knl
,
Or
((
Writes
(
name_vec
),
Writes
(
name_vec
)))))
if
insn
.
id
in
accum_insns
:
assignee_alias
=
insn
.
assignee
try
:
...
...
@@ -293,9 +311,11 @@ def add_vcl_access(knl, iname_inner):
from
dune.perftool.error
import
PerftoolVectorizationError
raise
PerftoolVectorizationError
new_insns
.
append
(
insn
.
copy
(
assignee
=
assignee_vec
,
depends_on
=
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]))
depends_on
=
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
write_ids
))
else
:
new_insns
.
append
(
insn
.
copy
(
depends_on
=
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]))
new_insns
.
append
(
insn
.
copy
(
depends_on
=
insn
.
depends_on
|
read_dependencies
[
insn
.
id
]
|
write_ids
))
return
knl
.
copy
(
instructions
=
new_insns
+
load_insns
+
store_insns
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment