From 402f9e8ca4bd32e4e9d823fbf6add4c16eff675e Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 6 Dec 2017 15:47:56 +0100 Subject: [PATCH] More adaptations --- .../knl/poisson_dg/knl_poisson_dg.mini | 4 +- applications/knl/poisson_dg/verify.mini | 4 +- .../knl_poisson_dg_tensor.mini | 4 +- .../knl/poisson_dg_tensor/verify.mini | 4 +- applications/poisson_dg/poisson_dg.mini | 2 +- applications/poisson_dg_tensor/CMakeLists.txt | 2 - .../poisson_dg_tensor/poisson_dg_tensor.mini | 2 +- .../poisson_dg_tensor/sliced/CMakeLists.txt | 5 -- .../poisson_dg_tensor/sliced/sliced.mini | 49 ------------------- applications/stokes_dg/stokes_dg.mini | 2 +- python/dune/perftool/sumfact/vectorization.py | 8 +-- test/sumfact/mass/sliced.mini | 4 +- test/sumfact/poisson/diagonal.mini | 4 +- test/sumfact/poisson/sliced.mini | 4 +- 14 files changed, 29 insertions(+), 69 deletions(-) delete mode 100644 applications/poisson_dg_tensor/sliced/CMakeLists.txt delete mode 100644 applications/poisson_dg_tensor/sliced/sliced.mini diff --git a/applications/knl/poisson_dg/knl_poisson_dg.mini b/applications/knl/poisson_dg/knl_poisson_dg.mini index 0d8380d8..78b2fe03 100644 --- a/applications/knl/poisson_dg/knl_poisson_dg.mini +++ b/applications/knl/poisson_dg/knl_poisson_dg.mini @@ -39,7 +39,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/knl/poisson_dg/verify.mini b/applications/knl/poisson_dg/verify.mini index 07c5074b..d6dcdc85 100644 --- a/applications/knl/poisson_dg/verify.mini +++ b/applications/knl/poisson_dg/verify.mini @@ -12,7 +12,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 quadrature_order = 6 architecture = knl diff --git a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini index 95c6368f..d7dd0166 100644 --- a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini +++ b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini @@ -39,7 +39,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/knl/poisson_dg_tensor/verify.mini b/applications/knl/poisson_dg_tensor/verify.mini index 029acbe5..c2447c07 100644 --- a/applications/knl/poisson_dg_tensor/verify.mini +++ b/applications/knl/poisson_dg_tensor/verify.mini @@ -12,7 +12,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 quadrature_order = 6 architecture = knl diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini index 2ba1c2cd..7bf6144b 100644 --- a/applications/poisson_dg/poisson_dg.mini +++ b/applications/poisson_dg/poisson_dg.mini @@ -39,7 +39,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/poisson_dg_tensor/CMakeLists.txt b/applications/poisson_dg_tensor/CMakeLists.txt index 7e1a5438..27ae6ff2 100644 --- a/applications/poisson_dg_tensor/CMakeLists.txt +++ b/applications/poisson_dg_tensor/CMakeLists.txt @@ -4,8 +4,6 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl NO_TESTS ) -add_subdirectory(sliced) - dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl BASENAME verify_app_poisson_dg_tensor INIFILE verify.mini diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.mini b/applications/poisson_dg_tensor/poisson_dg_tensor.mini index d0c7251a..80986df2 100644 --- a/applications/poisson_dg_tensor/poisson_dg_tensor.mini +++ b/applications/poisson_dg_tensor/poisson_dg_tensor.mini @@ -39,7 +39,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/poisson_dg_tensor/sliced/CMakeLists.txt b/applications/poisson_dg_tensor/sliced/CMakeLists.txt deleted file mode 100644 index fb516b3c..00000000 --- a/applications/poisson_dg_tensor/sliced/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -dune_add_formcompiler_system_test(UFLFILE ../poisson_dg_tensor.ufl - BASENAME app_poisson_dg_tensor_sliced - INIFILE sliced.mini - NO_TESTS - ) diff --git a/applications/poisson_dg_tensor/sliced/sliced.mini b/applications/poisson_dg_tensor/sliced/sliced.mini deleted file mode 100644 index 7522320a..00000000 --- a/applications/poisson_dg_tensor/sliced/sliced.mini +++ /dev/null @@ -1,49 +0,0 @@ -__name = app_poisson_dg_tensor_sliced_{__exec_suffix} -__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{formcompiler.instrumentation_level} - -opcount_suffix = opcount, nonopcount | expand opcount -{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude - -# Calculate the size of the grid to equlibritate it to 100 MB/rank -# Input parameters -dim = 3 -mbperrank = 100 -ranks = 32 -floatingbytes = 8 - -# Metaini Calculations -memperrank = {mbperrank} * 1048576 | eval -dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval -celldofs = {dofsperdir} ** {dim} | eval -cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval -cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint -firstdircells = {ranks} * {cellsperdir} | eval -dimminusone = {dim} - 1 | eval -ones = 1 | repeat {dimminusone} -otherdircells = {cellsperdir} | repeat {dimminusone} - -# Setup the grid! -extension = 1.0 | repeat {dim} -cells = {firstdircells} {otherdircells} -partitioning = {ranks} {ones} - -# Set up the timing identifier -identifier = poisson_dg_tensor_deg{formcompiler.ufl_variants.degree} - -[wrapper.vtkcompare] -name = {__name} -extension = vtu - -[formcompiler] -fastdg = 1 -sumfact = 1 -vectorization_quadloop = 1 -vectorize_slice = 1 -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount -quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval - -[formcompiler.ufl_variants] -cell = hexahedron -degree = 3, 7 | expand diff --git a/applications/stokes_dg/stokes_dg.mini b/applications/stokes_dg/stokes_dg.mini index 2727253b..f6ae1d0f 100644 --- a/applications/stokes_dg/stokes_dg.mini +++ b/applications/stokes_dg/stokes_dg.mini @@ -40,7 +40,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 1c2a7098..253b3528 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -226,9 +226,11 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already= while horizontal <= width: # Iterate over the possible combinations of sum factorization kernels # taking into account all the permutations of kernels. This also includes - # combinations which use a padding of 1. - for combo in it.chain(it.permutations(candidates, horizontal), - it.permutations(candidates, horizontal - 1)): + # combinations which use a padding of 1 - but only for pure horizontality. + generators = [it.permutations(candidates, horizontal)] + if horizontal >= 4: + generators.append(it.permutations(candidates, horizontal - 1)) + for combo in it.chain(*generators): # The chosen kernels must be part of the kernels for recursion # to work correctly if sf_to_decide not in combo: diff --git a/test/sumfact/mass/sliced.mini b/test/sumfact/mass/sliced.mini index 712d2d5e..90dab43e 100644 --- a/test/sumfact/mass/sliced.mini +++ b/test/sumfact/mass/sliced.mini @@ -11,7 +11,9 @@ extension = vtu [formcompiler] numerical_jacobian = 1 -vectorize_slice = 1 +vectorization_strategy = explicit +vectorization_horizontal = 1 +vectorization_vertical = 4 sumfact = 1 [formcompiler.ufl_variants] diff --git a/test/sumfact/poisson/diagonal.mini b/test/sumfact/poisson/diagonal.mini index d2687d64..d3744184 100644 --- a/test/sumfact/poisson/diagonal.mini +++ b/test/sumfact/poisson/diagonal.mini @@ -11,7 +11,9 @@ extension = vtu sumfact = 1 compare_l2errorsquared = 1e-5 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 2 +vectorization_vertical = 2 quadrature_order = 6, 6, 6 fastdg = 1 diff --git a/test/sumfact/poisson/sliced.mini b/test/sumfact/poisson/sliced.mini index 39e9e915..858b8c6b 100644 --- a/test/sumfact/poisson/sliced.mini +++ b/test/sumfact/poisson/sliced.mini @@ -11,7 +11,9 @@ extension = vtu sumfact = 1 compare_l2errorsquared = 1e-5 vectorization_quadloop = 1 -vectorize_slice = 1 +vectorization_strategy = explicit +vectorization_horizontal = 1 +vectorization_vertical = 4 quadrature_order = 6 [formcompiler.ufl_variants] -- GitLab