-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathtest_benchmarks.py
131 lines (101 loc) · 4.34 KB
/
test_benchmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Optional benchmarks-do-not-regress test"""
import contextlib
import json
import logging
import platform
import re
import shutil
from pathlib import Path
import pytest
from framework import utils
from framework.ab_test import git_ab_test
from host_tools.cargo_build import cargo
LOGGER = logging.getLogger(__name__)
def get_executables():
"""
Get a list of binaries for benchmarking
"""
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
# extract the path to the compiled benchmark binary.
_, stdout, _ = cargo(
"bench",
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
)
executables = []
for line in stdout.split("\n"):
if line:
msg = json.loads(line)
executable = msg.get("executable")
if executable:
executables.append(executable)
return executables
@pytest.mark.no_block_pr
@pytest.mark.timeout(600)
@pytest.mark.parametrize("executable", get_executables())
def test_no_regression_relative_to_target_branch(executable):
"""
Run the microbenchmarks in this repository, comparing results from pull
request target branch against what's achieved on HEAD
"""
run_criterion = get_run_criterion(executable)
compare_results = get_compare_results(executable)
git_ab_test(run_criterion, compare_results)
def get_run_criterion(executable):
"""
Get function that executes specified benchmarks, and running them pinned to some CPU
"""
def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
baseline_name = "a_baseline" if is_a else "b_baseline"
with contextlib.chdir(firecracker_checkout):
utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
)
return firecracker_checkout / "build" / "cargo_target" / "criterion"
return _run_criterion
def get_compare_results(executable):
"""
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
"""
def _compare_results(location_a_baselines: Path, location_b_baselines: Path):
list_result = utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --list"
)
# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
bench_marks = [
re.sub(r"\s#(?P<sub_id>[1-9]+)", r"_\g<sub_id>", i.split(":")[0])
for i in list_result.stdout.split("\n")
if i.endswith(": benchmark")
]
for benchmark in bench_marks:
data = json.loads(
(
location_b_baselines / benchmark / "b_baseline" / "estimates.json"
).read_text("utf-8")
)
average_ns = data["mean"]["point_estimate"]
LOGGER.info("%s mean: %iµs", benchmark, average_ns / 1000)
# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
# to do the comparison
for benchmark in bench_marks:
shutil.copytree(
location_a_baselines / benchmark / "a_baseline",
location_b_baselines / benchmark / "a_baseline",
)
bench_result = utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --baseline a_baseline --load-baseline b_baseline",
True,
Path.cwd().parent,
)
regressions_only = "\n\n".join(
result
for result in bench_result.stdout.split("\n\n")
if "Performance has regressed." in result
)
# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only
return _compare_results