From 6cb3a57e670cd445c5b4db793b2855e68a599ab6 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 6 Apr 2017 13:54:38 +0200 Subject: [PATCH] First implementation of vertical vectorization --- python/dune/perftool/sumfact/tabulation.py | 2 +- python/dune/perftool/sumfact/vectorization.py | 39 +++++++++++++++++-- test/sumfact/mass/CMakeLists.txt | 5 +++ test/sumfact/mass/mass_3d.mini | 3 ++ test/sumfact/mass/mass_3d.ufl | 2 +- 5 files changed, 46 insertions(+), 5 deletions(-) diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index d6519699..71cadb75 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -56,7 +56,7 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord): if quadrature_size is None: quadrature_size = quadrature_points_per_direction() if slice_size is not None: - quadrature_size = ceildiv(quadrature_size, slize_size) + quadrature_size = ceildiv(quadrature_size, slice_size) if basis_size is None: basis_size = basis_functions_per_direction() ImmutableRecord.__init__(self, diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 460ae1e9..4e709b39 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -41,6 +41,35 @@ def no_vectorization(sumfacts): input=get_counted_variable("input"))) +def vertical_vectorization_strategy(sumfact, depth): + # For sake of simplicity we restrict us to stage 1 so far + if sumfact.stage == 1: + # Assert that this is not already sliced + assert all(mat.slice_size is None for mat in sumfact.matrix_sequence) + + # Determine which of the matrices in the kernel should be sliced + sliced = None + for i, mat in enumerate(sumfact.matrix_sequence): + if mat.quadrature_size % depth == 0: + sliced = i + + # Currently we assume that this function is always able to do the thing! + assert sliced is not None + + kernels = [] + oldtab = sumfact.matrix_sequence[sliced] + for i in range(depth): + seq = list(sumfact.matrix_sequence) + seq[sliced] = oldtab.copy(slice_size=depth, + slice_index=i) + kernels.append(sumfact.copy(matrix_sequence=tuple(seq))) + + vsf = VectorizedSumfactKernel(kernels=tuple(kernels)) + return _cache_vectorization_info(sumfact, vsf) + else: + return _cache_vectorization_info(sumfact, sumfact) + + def horizontal_vectorization_strategy(sumfacts): width = get_vcl_type_size(np.float64) # We currently heuristically allow horizontal vectorization if the number @@ -95,12 +124,16 @@ def decide_vectorization_strategy(): from dune.perftool.generation import retrieve_cache_items sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] - if not get_option("vectorize_grads"): - no_vectorization(sumfacts) - else: + if get_option("vectorize_grads"): # Currently we base our idea here on the fact that we only group sum # factorization kernels with the same input. inputkeys = set(sf.input_key for sf in sumfacts) for inputkey in inputkeys: sumfact_filter = [sf for sf in sumfacts if sf.input_key == inputkey] horizontal_vectorization_strategy(sumfact_filter) + elif get_option("vectorize_slice"): + for sumfact in sumfacts: + width = get_vcl_type_size(np.float64) + vertical_vectorization_strategy(sumfact, width) + else: + no_vectorization(sumfacts) diff --git a/test/sumfact/mass/CMakeLists.txt b/test/sumfact/mass/CMakeLists.txt index a640f020..a1313988 100644 --- a/test/sumfact/mass/CMakeLists.txt +++ b/test/sumfact/mass/CMakeLists.txt @@ -8,3 +8,8 @@ dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl BASENAME sumfact_mass_3d INIFILE mass_3d.mini ) + +dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl + BASENAME sumfact_mass_sliced + INIFILE sliced.mini + ) diff --git a/test/sumfact/mass/mass_3d.mini b/test/sumfact/mass/mass_3d.mini index 2948674e..1244acec 100644 --- a/test/sumfact/mass/mass_3d.mini +++ b/test/sumfact/mass/mass_3d.mini @@ -17,3 +17,6 @@ extension = vtu numerical_jacobian = 1, 0 | expand num vectorize_quad = 1, 0 | expand vec sumfact = 1 + +[formcompiler.ufl_variants] +degree = 1 diff --git a/test/sumfact/mass/mass_3d.ufl b/test/sumfact/mass/mass_3d.ufl index bd56d296..5f55103e 100644 --- a/test/sumfact/mass/mass_3d.ufl +++ b/test/sumfact/mass/mass_3d.ufl @@ -1,6 +1,6 @@ cell = "hexahedron" -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V) -- GitLab