diff --git a/applications/knl/poisson_dg/knl_poisson_dg.mini b/applications/knl/poisson_dg/knl_poisson_dg.mini index 0d8380d821b204d3b122c648a08943f24488fb48..78b2fe03adb632d2a301388342865bda20d71290 100644 --- a/applications/knl/poisson_dg/knl_poisson_dg.mini +++ b/applications/knl/poisson_dg/knl_poisson_dg.mini @@ -39,7 +39,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/knl/poisson_dg/verify.mini b/applications/knl/poisson_dg/verify.mini index 07c5074b1d780074763605d29a12934d69a8a99f..d6dcdc8532a2809b4842ee2d078d72f7f20ff97a 100644 --- a/applications/knl/poisson_dg/verify.mini +++ b/applications/knl/poisson_dg/verify.mini @@ -12,7 +12,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 quadrature_order = 6 architecture = knl diff --git a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini index 95c6368f7add85f69a432635438da01a037ff490..d7dd0166279594103b51652059566af89cd9850f 100644 --- a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini +++ b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini @@ -39,7 +39,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/knl/poisson_dg_tensor/verify.mini b/applications/knl/poisson_dg_tensor/verify.mini index 029acbe57eae0a7bb0b2291ce5906369f45fb55e..c2447c077f3ad00c39585b846e57b01b19ef23f3 100644 --- a/applications/knl/poisson_dg_tensor/verify.mini +++ b/applications/knl/poisson_dg_tensor/verify.mini @@ -12,7 +12,9 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 4 +vectorization_vertical = 2 quadrature_order = 6 architecture = knl diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini index 2ba1c2cd1b9392990ef4022cd43095ec5ffb489b..7bf6144ba013d3aa8808ccf0994a0d706bf8e1aa 100644 --- a/applications/poisson_dg/poisson_dg.mini +++ b/applications/poisson_dg/poisson_dg.mini @@ -39,7 +39,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/poisson_dg_tensor/CMakeLists.txt b/applications/poisson_dg_tensor/CMakeLists.txt index 7e1a54384e8758349000d496f9036b96a7a85e7e..27ae6ff295a83acfd889207e6e8fc266de33c18e 100644 --- a/applications/poisson_dg_tensor/CMakeLists.txt +++ b/applications/poisson_dg_tensor/CMakeLists.txt @@ -4,8 +4,6 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl NO_TESTS ) -add_subdirectory(sliced) - dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl BASENAME verify_app_poisson_dg_tensor INIFILE verify.mini diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.mini b/applications/poisson_dg_tensor/poisson_dg_tensor.mini index d0c7251aad61e024213e3a8215c4ae48f083c5a6..80986df2a842f009ab773cdf80b6219b8b98ce95 100644 --- a/applications/poisson_dg_tensor/poisson_dg_tensor.mini +++ b/applications/poisson_dg_tensor/poisson_dg_tensor.mini @@ -39,7 +39,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/applications/poisson_dg_tensor/sliced/CMakeLists.txt b/applications/poisson_dg_tensor/sliced/CMakeLists.txt deleted file mode 100644 index fb516b3cae2c116dfd8ce5cd0ee09dd238a2deae..0000000000000000000000000000000000000000 --- a/applications/poisson_dg_tensor/sliced/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -dune_add_formcompiler_system_test(UFLFILE ../poisson_dg_tensor.ufl - BASENAME app_poisson_dg_tensor_sliced - INIFILE sliced.mini - NO_TESTS - ) diff --git a/applications/poisson_dg_tensor/sliced/sliced.mini b/applications/poisson_dg_tensor/sliced/sliced.mini deleted file mode 100644 index 7522320ab62cf52fc5af6bb4225363735c675122..0000000000000000000000000000000000000000 --- a/applications/poisson_dg_tensor/sliced/sliced.mini +++ /dev/null @@ -1,49 +0,0 @@ -__name = app_poisson_dg_tensor_sliced_{__exec_suffix} -__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{formcompiler.instrumentation_level} - -opcount_suffix = opcount, nonopcount | expand opcount -{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude - -# Calculate the size of the grid to equlibritate it to 100 MB/rank -# Input parameters -dim = 3 -mbperrank = 100 -ranks = 32 -floatingbytes = 8 - -# Metaini Calculations -memperrank = {mbperrank} * 1048576 | eval -dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval -celldofs = {dofsperdir} ** {dim} | eval -cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval -cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint -firstdircells = {ranks} * {cellsperdir} | eval -dimminusone = {dim} - 1 | eval -ones = 1 | repeat {dimminusone} -otherdircells = {cellsperdir} | repeat {dimminusone} - -# Setup the grid! -extension = 1.0 | repeat {dim} -cells = {firstdircells} {otherdircells} -partitioning = {ranks} {ones} - -# Set up the timing identifier -identifier = poisson_dg_tensor_deg{formcompiler.ufl_variants.degree} - -[wrapper.vtkcompare] -name = {__name} -extension = vtu - -[formcompiler] -fastdg = 1 -sumfact = 1 -vectorization_quadloop = 1 -vectorize_slice = 1 -instrumentation_level = 2, 3, 4 | expand -opcounter = 1, 0 | expand opcount -time_opcounter = 0, 1 | expand opcount -quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval - -[formcompiler.ufl_variants] -cell = hexahedron -degree = 3, 7 | expand diff --git a/applications/stokes_dg/stokes_dg.mini b/applications/stokes_dg/stokes_dg.mini index 2727253b46dc9a0bcbe9f23116461dd896cff9ac..f6ae1d0f5f77e5a83e0634ee3a39cea8edf70dcd 100644 --- a/applications/stokes_dg/stokes_dg.mini +++ b/applications/stokes_dg/stokes_dg.mini @@ -40,7 +40,7 @@ extension = vtu fastdg = 1 sumfact = 1 vectorization_quadloop = 1 -vectorize_greedy = 1 +vectorization_strategy = explicit instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 1c2a7098e7092e8cd36047658fd98d264c0fb033..253b35281587641ef2d32dcb8c68277ec0d7126c 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -226,9 +226,11 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already= while horizontal <= width: # Iterate over the possible combinations of sum factorization kernels # taking into account all the permutations of kernels. This also includes - # combinations which use a padding of 1. - for combo in it.chain(it.permutations(candidates, horizontal), - it.permutations(candidates, horizontal - 1)): + # combinations which use a padding of 1 - but only for pure horizontality. + generators = [it.permutations(candidates, horizontal)] + if horizontal >= 4: + generators.append(it.permutations(candidates, horizontal - 1)) + for combo in it.chain(*generators): # The chosen kernels must be part of the kernels for recursion # to work correctly if sf_to_decide not in combo: diff --git a/test/sumfact/mass/sliced.mini b/test/sumfact/mass/sliced.mini index 712d2d5ef414acb35a09947f0f9e77aae5811865..90dab43e70b8ddc38830c37afb2dd83b4116f5e7 100644 --- a/test/sumfact/mass/sliced.mini +++ b/test/sumfact/mass/sliced.mini @@ -11,7 +11,9 @@ extension = vtu [formcompiler] numerical_jacobian = 1 -vectorize_slice = 1 +vectorization_strategy = explicit +vectorization_horizontal = 1 +vectorization_vertical = 4 sumfact = 1 [formcompiler.ufl_variants] diff --git a/test/sumfact/poisson/diagonal.mini b/test/sumfact/poisson/diagonal.mini index d2687d6469fc7a67913754f1ee91c72641909539..d3744184c52abd1320aa796bc16249c478afe9a7 100644 --- a/test/sumfact/poisson/diagonal.mini +++ b/test/sumfact/poisson/diagonal.mini @@ -11,7 +11,9 @@ extension = vtu sumfact = 1 compare_l2errorsquared = 1e-5 vectorization_quadloop = 1 -vectorize_diagonal = 1 +vectorization_strategy = explicit +vectorization_horizontal = 2 +vectorization_vertical = 2 quadrature_order = 6, 6, 6 fastdg = 1 diff --git a/test/sumfact/poisson/sliced.mini b/test/sumfact/poisson/sliced.mini index 39e9e915afb8b31495b3ed715265a72d5fa08555..858b8c6b6b8804f3cede25236ff29ce66bae010b 100644 --- a/test/sumfact/poisson/sliced.mini +++ b/test/sumfact/poisson/sliced.mini @@ -11,7 +11,9 @@ extension = vtu sumfact = 1 compare_l2errorsquared = 1e-5 vectorization_quadloop = 1 -vectorize_slice = 1 +vectorization_strategy = explicit +vectorization_horizontal = 1 +vectorization_vertical = 4 quadrature_order = 6 [formcompiler.ufl_variants]