diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index f0b92777dd2f8..4893ff45a74ff 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -13,6 +13,7 @@ import os import csv import io +import re def isUMFAvailable(): @@ -45,90 +46,36 @@ def benchmarks(self) -> list[Benchmark]: return benches -class ComputeUMFBenchmark(Benchmark): - def __init__(self, bench, name): +class GBench(Benchmark): + def __init__(self, bench): super().__init__(bench.directory, bench) self.bench = bench - self.bench_name = name + self.bench_name = "umf-benchmark" self.oneapi = get_oneapi() + self.umf_lib = options.umf + "lib" - self.col_name = None - self.col_iterations = None - self.col_real_time = None - self.col_cpu_time = None - self.col_time_unit = None - - self.col_statistics_time = None + self.fragmentation_prefix = "FRAGMENTATION_" - def bin_args(self) -> list[str]: - return [] + self.num_cols_with_memory = 13 - def extra_env_vars(self) -> dict: - return {} - - def setup(self): - if not isUMFAvailable(): - print("UMF prefix path not provided") - return - - self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) - - def get_tags(self): - return ["UMF", "allocation", "latency", "micro"] - - def run(self, env_vars) -> list[Result]: - command = [ - f"{self.benchmark_bin}", - ] - - command += self.bin_args() - env_vars.update(self.extra_env_vars()) - - result = self.run_bench( - command, env_vars, add_sycl=False, ld_library=[self.oneapi.tbb_lib()] - ) - parsed = self.parse_output(result) - results = [] - for r in parsed: - (config, pool, mean) = r - label = f"{config} {pool}" - results.append( - Result( - label=label, - value=mean, - command=command, - env=env_vars, - stdout=result, - unit="ns", - explicit_group=config, - ) - ) - return results - - # Implementation with self.col_* indices could lead to the division by None - def get_mean(self, datarow): - raise NotImplementedError() - - def teardown(self): - return - - -class GBench(ComputeUMFBenchmark): - def __init__(self, bench): - super().__init__(bench, "umf-benchmark") - - self.col_name = 0 - self.col_iterations = 1 - self.col_real_time = 2 - self.col_cpu_time = 3 - self.col_time_unit = 4 + self.col_name = "name" + self.col_iterations = "iterations" + self.col_real_time = "real_time" + self.col_cpu_time = "cpu_time" + self.col_time_unit = "time_unit" + self.col_memory_overhead = "memory_overhead" self.idx_pool = 0 self.idx_config = 1 self.name_separator = "/" self.col_statistics_time = self.col_real_time + self.col_statistics_memory = self.col_memory_overhead + + self.is_preloaded = False + + self.lib_to_be_replaced = None def name(self): return self.bench_name @@ -139,17 +86,23 @@ def name(self): def bin_args(self): return ["--benchmark_format=csv"] - # the default unit - # might be changed globally with --benchmark_time_unit={ns|us|ms|s} - # the change affects only benchmark where time unit has not been set - # explicitly - def unit(self): - return "ns" - # these benchmarks are not stable, so set this at a large value def stddev_threshold(self) -> float: return 0.2 # 20% + def extra_env_vars(self) -> dict: + return {} + + def setup(self): + if not isUMFAvailable(): + print("UMF prefix path not provided") + return + + self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) + + def is_memory_statistics_included(self, data_row): + return len(data_row) == self.num_cols_with_memory + def get_pool_and_config(self, full_name): list_split = full_name.split(self.name_separator, 1) if len(list_split) != 2: @@ -157,71 +110,115 @@ def get_pool_and_config(self, full_name): return list_split[self.idx_pool], list_split[self.idx_config] - def get_mean(self, datarow): + def get_mean_time(self, datarow): return float(datarow[self.col_statistics_time]) - def parse_output(self, output): - csv_file = io.StringIO(output) - reader = csv.reader(csv_file) + def get_memory_overhead(self, datarow): + return float(datarow[self.col_statistics_memory]) - data_row = next(reader, None) - if data_row is None: - raise ValueError("Benchmark output does not contain data.") + def get_unit_time_or_overhead(self, config): + if re.search(f"^{self.fragmentation_prefix}", config): + return "%" - results = [] - for row in reader: - try: - full_name = row[self.col_name] - pool, config = self.get_pool_and_config(full_name) - mean = self.get_mean(row) - results.append((config, pool, mean)) - except KeyError as e: - raise ValueError(f"Error parsing output: {e}") + # the default time unit + # might be changed globally with --benchmark_time_unit={ns|us|ms|s} + # the change affects only benchmark where time unit has not been set + # explicitly + return "ns" - return results + def get_names_of_benchmarks_to_be_run(self, command, env_vars): + list_all_command = command + ["--benchmark_list_tests"] + if self.is_preloaded: + list_all_command += ["--benchmark_filter=" + self.lib_to_be_replaced] -class GBenchPreloaded(GBench): - def __init__(self, bench, lib_to_be_replaced, replacing_lib): - super().__init__(bench) + all_names = self.run_bench( + list_all_command, env_vars, add_sycl=False, ld_library=[self.umf_lib] + ).splitlines() - self.lib_to_be_replaced = lib_to_be_replaced - self.replacing_lib = replacing_lib + return all_names - def bin_args(self): - full_args = super().bin_args() - full_args.append(f"--benchmark_filter={self.lib_to_be_replaced}") + def run(self, env_vars) -> list[Result]: + command = [f"{self.benchmark_bin}"] - return full_args + all_names = self.get_names_of_benchmarks_to_be_run(command, env_vars) - def get_preloaded_name(self, pool_name) -> str: - new_pool_name = pool_name.replace(self.lib_to_be_replaced, self.replacing_lib) + command += self.bin_args() + env_vars.update(self.extra_env_vars()) - return new_pool_name + results = [] + + for name in all_names: + specific_benchmark = command + ["--benchmark_filter=^" + name + "$"] + + result = self.run_bench( + specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib] + ) + + parsed = self.parse_output(result) + for r in parsed: + (explicit_group, pool, value) = r + label = f"{explicit_group} {pool}" + results.append( + Result( + label=label, + value=value, + command=command, + env=env_vars, + stdout=result, + unit=self.get_unit_time_or_overhead(explicit_group), + explicit_group=explicit_group, + ) + ) + + return results def parse_output(self, output): csv_file = io.StringIO(output) - reader = csv.reader(csv_file) - - data_row = next(reader, None) - if data_row is None: - raise ValueError("Benchmark output does not contain data.") + reader = csv.DictReader(csv_file) results = [] + for row in reader: try: full_name = row[self.col_name] pool, config = self.get_pool_and_config(full_name) - mean = self.get_mean(row) - updated_pool = self.get_preloaded_name(pool) - updated_config = self.get_preloaded_name(config) + statistics_time = self.get_mean_time(row) + + if self.is_preloaded: + pool = self.get_preloaded_pool_name(pool) + + results.append((config, pool, statistics_time)) + + if self.is_memory_statistics_included(row): + statistics_overhead = self.get_memory_overhead(row) + config = self.fragmentation_prefix + config + + results.append((config, pool, statistics_overhead)) - results.append((updated_config, updated_pool, mean)) except KeyError as e: raise ValueError(f"Error parsing output: {e}") return results + def teardown(self): + return + + +class GBenchPreloaded(GBench): + def __init__(self, bench, lib_to_be_replaced, replacing_lib): + super().__init__(bench) + + self.is_preloaded = True + + self.lib_to_be_replaced = lib_to_be_replaced + self.replacing_lib = replacing_lib + + def get_preloaded_pool_name(self, pool_name) -> str: + new_pool_name = pool_name.replace(self.lib_to_be_replaced, self.replacing_lib) + + return new_pool_name + class GBenchGlibc(GBenchPreloaded): def __init__(self, bench, replacing_lib):