diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index d6519699bd065667f779d81bed1abc450123fd18..71cadb752f9b004c8af184f7ca295001e6710437 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -56,7 +56,7 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord): if quadrature_size is None: quadrature_size = quadrature_points_per_direction() if slice_size is not None: - quadrature_size = ceildiv(quadrature_size, slize_size) + quadrature_size = ceildiv(quadrature_size, slice_size) if basis_size is None: basis_size = basis_functions_per_direction() ImmutableRecord.__init__(self, diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 460ae1e9fb77ffac0b26e7fb733b4ffa4e10adb8..4e709b391f807d05b06fe9fc65822a0767c0f734 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -41,6 +41,35 @@ def no_vectorization(sumfacts): input=get_counted_variable("input"))) +def vertical_vectorization_strategy(sumfact, depth): + # For sake of simplicity we restrict us to stage 1 so far + if sumfact.stage == 1: + # Assert that this is not already sliced + assert all(mat.slice_size is None for mat in sumfact.matrix_sequence) + + # Determine which of the matrices in the kernel should be sliced + sliced = None + for i, mat in enumerate(sumfact.matrix_sequence): + if mat.quadrature_size % depth == 0: + sliced = i + + # Currently we assume that this function is always able to do the thing! + assert sliced is not None + + kernels = [] + oldtab = sumfact.matrix_sequence[sliced] + for i in range(depth): + seq = list(sumfact.matrix_sequence) + seq[sliced] = oldtab.copy(slice_size=depth, + slice_index=i) + kernels.append(sumfact.copy(matrix_sequence=tuple(seq))) + + vsf = VectorizedSumfactKernel(kernels=tuple(kernels)) + return _cache_vectorization_info(sumfact, vsf) + else: + return _cache_vectorization_info(sumfact, sumfact) + + def horizontal_vectorization_strategy(sumfacts): width = get_vcl_type_size(np.float64) # We currently heuristically allow horizontal vectorization if the number @@ -95,12 +124,16 @@ def decide_vectorization_strategy(): from dune.perftool.generation import retrieve_cache_items sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] - if not get_option("vectorize_grads"): - no_vectorization(sumfacts) - else: + if get_option("vectorize_grads"): # Currently we base our idea here on the fact that we only group sum # factorization kernels with the same input. inputkeys = set(sf.input_key for sf in sumfacts) for inputkey in inputkeys: sumfact_filter = [sf for sf in sumfacts if sf.input_key == inputkey] horizontal_vectorization_strategy(sumfact_filter) + elif get_option("vectorize_slice"): + for sumfact in sumfacts: + width = get_vcl_type_size(np.float64) + vertical_vectorization_strategy(sumfact, width) + else: + no_vectorization(sumfacts) diff --git a/test/sumfact/mass/CMakeLists.txt b/test/sumfact/mass/CMakeLists.txt index a640f0204c5393bc10b5c9fa66f167586d952b65..a131398857faf411afcc2a0b49e7e925366b909a 100644 --- a/test/sumfact/mass/CMakeLists.txt +++ b/test/sumfact/mass/CMakeLists.txt @@ -8,3 +8,8 @@ dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl BASENAME sumfact_mass_3d INIFILE mass_3d.mini ) + +dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl + BASENAME sumfact_mass_sliced + INIFILE sliced.mini + ) diff --git a/test/sumfact/mass/mass_3d.mini b/test/sumfact/mass/mass_3d.mini index 2948674e0acc6b34dd4e92fec728f449e38978c6..1244acecc9c387519711ca883d6bc1341b268591 100644 --- a/test/sumfact/mass/mass_3d.mini +++ b/test/sumfact/mass/mass_3d.mini @@ -17,3 +17,6 @@ extension = vtu numerical_jacobian = 1, 0 | expand num vectorize_quad = 1, 0 | expand vec sumfact = 1 + +[formcompiler.ufl_variants] +degree = 1 diff --git a/test/sumfact/mass/mass_3d.ufl b/test/sumfact/mass/mass_3d.ufl index bd56d29607424ad9c0a9b8f13e709fd509836237..5f55103e52f0b84c550e38f68c0acc8d77465793 100644 --- a/test/sumfact/mass/mass_3d.ufl +++ b/test/sumfact/mass/mass_3d.ufl @@ -1,6 +1,6 @@ cell = "hexahedron" -V = FiniteElement("DG", cell, 1) +V = FiniteElement("DG", cell, degree) u = TrialFunction(V) v = TestFunction(V)