diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index bf4634463e63404437943bfb4aa1c5ba21c09e1d..2b51a85342c861d691a7397888499669544e0413 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -54,20 +54,18 @@ def attach_vectorization_info(sf): def costmodel(sf): - # Penalize vertical vectorization - vertical_penalty = 1 + math.log(sf.vertical_width) + # Penalize vertical vectorization and scalar execution + verticality = sf.vertical_width + if isinstance(sf, SumfactKernel): + verticality = get_vcl_type_size(dtype_floatingpoint()) + vertical_penalty = 1 + 0.5 * math.log(verticality, 2) memory_penalty = 1.0 if isinstance(sf, VectorizedSumfactKernel): - memory_penalty = 1.0 + math.log(len(set(k.interface for k in sf.kernels)), 2) - - # Penalize scalar sum factorization kernels - scalar_penalty = 1 - if isinstance(sf, SumfactKernel): - scalar_penalty = get_vcl_type_size(dtype_floatingpoint()) + memory_penalty = 1.0 + 0.25 * math.log(len(set(k.interface for k in sf.kernels)), 2) # Return total operations - return sf.operations * vertical_penalty * memory_penalty * scalar_penalty + return sf.operations * vertical_penalty * memory_penalty def explicit_costfunction(sf): @@ -265,21 +263,27 @@ def level1_optimal_vectorization_strategy(sumfacts, width): # Find the minimum cost strategy between all the quadrature point tuples optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points} - qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp]))) # If we are using the 'target' strategy, we might want to log some information. if get_form_option("vectorization_strategy") == "target": # Print the achieved cost and the target cost on the screen set_form_option("vectorization_strategy", "model") + qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp]))) cost = strategy_cost((qp, optimal_strategies[qp])) + print("The target cost was: {}".format(get_form_option("vectorization_target"))) print("The achieved cost was: {}".format(cost)) - print("The optimal cost would be: {}".format(strategy_cost(level1_optimal_vectorization_strategy(sumfacts, width)))) + optimum = level1_optimal_vectorization_strategy(sumfacts, width) + print("The optimal cost would be: {}".format(strategy_cost(optimum))) set_form_option("vectorization_strategy", "target") print("The score in 'target' logic was: {}".format(strategy_cost((qp, optimal_strategies[qp])))) # Print the employed vectorization strategy into a file - filename = "targetstrat_{}.log".format(int(float(get_form_option("vectorization_target")))) + suffix = "" + if get_global_context_value("integral_type") == "interior_facet": + suffix = "_dir{}_mod{}".format(get_global_context_value("facedir_s"), + get_global_context_value("facemod_s")) + filename = "targetstrat_{}{}.log".format(int(float(get_form_option("vectorization_target"))), suffix) with open(filename, 'w') as f: f.write("\n".join(stringify_vectorization_strategy((qp, optimal_strategies[qp])))) @@ -291,6 +295,8 @@ def level1_optimal_vectorization_strategy(sumfacts, width): # TODO: Depending on the number of samples, we might need a file lock here. with open("mapping.csv", 'a') as f: f.write(" ".join((identifier, str(cost), short_stringify_vectorization_strategy((qp, optimal_strategies[qp])))) + "\n") + else: + qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp]))) return qp, optimal_strategies[qp]