Skip to content

Commit 3029b28

Browse files
lapp0brandonwillard
authored andcommitted
ASV PR bench workflow, pytest-bench -> ASV, add peakmem tests
1 parent 95f108e commit 3029b28

10 files changed

+191
-78
lines changed
+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Benchmark PR
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
workflow_dispatch:
7+
env:
8+
PYTHON_VERSION: "3.10"
9+
WORKING_DIR: ${{ github.workspace }}/benchmarks
10+
BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output
11+
12+
jobs:
13+
benchmark-pr:
14+
runs-on: ubuntu-latest
15+
if: contains(github.event.pull_request.labels.*.name, 'run_benchmarks') || github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_run'
16+
17+
defaults:
18+
run:
19+
working-directory: ${{ env.WORKING_DIR }}
20+
21+
steps:
22+
23+
- name: Checkout repository
24+
uses: actions/checkout@v3
25+
with:
26+
fetch-depth: 0
27+
28+
- name: Set up Python
29+
uses: actions/setup-python@v4
30+
with:
31+
python-version: ${{ env.PYTHON_VERSION }}
32+
33+
- name: Install dependencies
34+
run: |
35+
python -m pip install --upgrade pip
36+
pip install asv virtualenv lf-asv-formatter
37+
38+
- name: Create ASV machine config file
39+
run: asv machine --machine gh-runner --yes
40+
41+
- name: Run Benchmarks - `PR HEAD` vs `main`
42+
run: |
43+
# prepare main branch for comparison
44+
git remote add upstream https://github.com/${{ github.repository }}.git
45+
git fetch upstream main
46+
47+
# Run benchmarks, allow errors, they will be caught in the next step
48+
asv continuous upstream/main HEAD \
49+
--no-stats --interleave-rounds -a repeat=3 || true
50+
51+
- name: BENCHMARK RESULTS
52+
run: asv compare --factor=1.1 --no-stats --split upstream/main HEAD

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ docs/build
66
.idea/
77
*.gguf
88
.venv
9+
benchmarks/results

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/asv.conf.json

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"version": 1,
3+
"project": "Outlines",
4+
"project_url": "https://outlines-dev.github.io/outlines/",
5+
"repo": "..",
6+
"branches": [
7+
"HEAD"
8+
],
9+
"build_command": [
10+
"python -mpip install .[test]",
11+
"PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}",
12+
],
13+
"environment_type": "virtualenv",
14+
"show_commit_url": "https://github.com/lapp0/outlines/commit/",
15+
"benchmark_dir": ".",
16+
"env_dir": "env",
17+
"results_dir": "results",
18+
"html_dir": "html",
19+
"build_cache_size": 8
20+
}

tests/benchmark/test_benchmark_json_schema.py benchmarks/bench_json_schema.py

+19-24
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
import pytest
2-
31
import outlines
42

53
outlines.disable_cache()
64

75
from outlines.fsm.guide import RegexGuide # noqa: E402
86
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402
97

8+
from .common import ( # noqa: E402
9+
clear_outlines_cache,
10+
ensure_numba_compiled,
11+
setup_tokenizer,
12+
)
13+
1014
simple_schema = """{
1115
"$defs": {
1216
"Armor": {
@@ -63,30 +67,21 @@
6367
"required": ["id", "work", "recording_artists"]
6468
}"""
6569

66-
6770
schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
6871

6972

70-
@pytest.mark.parametrize("schema_name", schemas.keys())
71-
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
72-
"""Benchmark convert json schema to regex"""
73-
schema = schemas[schema_name]
74-
benchmark.pedantic(
75-
build_regex_from_schema,
76-
args=(schema,),
77-
rounds=8,
78-
)
73+
class JsonSchemaBenchmark:
74+
params = schemas.keys()
75+
76+
def setup(self, schema_name):
77+
clear_outlines_cache()
78+
self.tokenizer = setup_tokenizer()
79+
self.schema = schemas[schema_name]
80+
ensure_numba_compiled(self.tokenizer)
7981

82+
def time_json_schema_to_regex(self, schema_name):
83+
build_regex_from_schema(self.schema)
8084

81-
@pytest.mark.parametrize("schema_name", schemas.keys())
82-
def test_benchmark_json_schema_to_fsm(
83-
benchmark, tokenizer, ensure_numba_compiled, schema_name
84-
):
85-
"""Benchmark compile json schema as FSM"""
86-
schema = schemas[schema_name]
87-
regex = build_regex_from_schema(schema)
88-
benchmark.pedantic(
89-
RegexGuide,
90-
args=(regex, tokenizer),
91-
rounds=8,
92-
)
85+
def time_json_schema_to_fsm(self, schema_name):
86+
regex = build_regex_from_schema(self.schema)
87+
RegexGuide(regex, self.tokenizer)

benchmarks/bench_numba_compile.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import importlib
2+
3+
import interegular
4+
import numba
5+
6+
import outlines
7+
8+
from .common import clear_outlines_cache, setup_tokenizer
9+
10+
outlines.disable_cache()
11+
12+
13+
class NumbaCompileBenchmark:
14+
def setup(self):
15+
clear_outlines_cache()
16+
from outlines.fsm import regex
17+
18+
self.tokenizer = setup_tokenizer()
19+
self.regex = regex
20+
original_njit = numba.njit
21+
22+
def mock_njit(*args, **kwargs):
23+
kwargs["cache"] = False
24+
return original_njit(*args, **kwargs)
25+
26+
self.original_njit = original_njit
27+
numba.njit = mock_njit
28+
importlib.reload(self.regex)
29+
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
30+
interegular.parse_pattern("a").to_fsm().reduce()
31+
)
32+
33+
def teardown(self):
34+
numba.njit = self.original_njit
35+
36+
def time_compile_numba(self):
37+
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import pytest
2-
31
import outlines
42

3+
from .common import clear_outlines_cache, ensure_numba_compiled, setup_tokenizer
4+
55
outlines.disable_cache()
66

77
from outlines.fsm.guide import RegexGuide # noqa: E402
@@ -19,14 +19,27 @@
1919
}
2020

2121

22-
@pytest.mark.parametrize("regex_name", regex_samples.keys())
23-
def test_benchmark_regex_to_fsm(
24-
benchmark, tokenizer, ensure_numba_compiled, regex_name
25-
):
26-
"""Benchmark converting regex to FSM"""
27-
regex_str = regex_samples[regex_name]
28-
benchmark.pedantic(
29-
RegexGuide,
30-
args=(regex_str, tokenizer),
31-
rounds=8,
32-
)
22+
class RegexGuideBenchmark:
23+
params = regex_samples.keys()
24+
25+
def setup(self, pattern_name):
26+
clear_outlines_cache()
27+
self.tokenizer = setup_tokenizer()
28+
ensure_numba_compiled(self.tokenizer)
29+
self.pattern = regex_samples[pattern_name]
30+
31+
def time_regex_to_guide(self, pattern_name):
32+
RegexGuide(self.pattern, self.tokenizer)
33+
34+
35+
class MemoryRegexGuideBenchmark:
36+
params = ["simple_phone", "complex_span_constrained_relation_extraction"]
37+
38+
def setup(self, pattern_name):
39+
clear_outlines_cache()
40+
self.tokenizer = setup_tokenizer()
41+
ensure_numba_compiled(self.tokenizer)
42+
self.pattern = regex_samples[pattern_name]
43+
44+
def peakmem_regex_to_guide(self, pattern_name):
45+
RegexGuide(self.pattern, self.tokenizer)
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1-
import pytest
21
from transformers import AutoTokenizer
32

3+
import outlines.caching
44
from outlines.fsm.guide import RegexGuide
55
from outlines.models.transformers import TransformerTokenizer
66

77

8-
@pytest.fixture
9-
def tokenizer():
8+
def clear_outlines_cache():
9+
outlines.caching.clear_cache()
10+
11+
12+
def setup_tokenizer():
1013
tokenizer = AutoTokenizer.from_pretrained("gpt2")
1114
return TransformerTokenizer(tokenizer)
1215

1316

14-
@pytest.fixture
1517
def ensure_numba_compiled(tokenizer):
1618
RegexGuide("a", tokenizer)
1719
return True

docs/community/contribute.md

+30-4
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,38 @@ And run the code style checks:
5757
pre-commit run --all-files
5858
```
5959

60-
When modifying the code related to the index compilation, we kindly ask you to
61-
post benchmarks before and after your changes. You can run benchmarks using:
60+
### Benchmarking
6261

63-
```python
64-
pytest --benchmark-only
62+
Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degredation.
63+
64+
You can run the benchmark test suite locally with the following command:
65+
```
66+
asv run --config benchmarks/asv.conf.json
67+
```
68+
69+
Run a specific test:
70+
```
71+
asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
72+
```
73+
74+
Profile a specific test:
6575
```
76+
asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
77+
```
78+
79+
Compare to `origin/main`
80+
```
81+
get fetch origin
82+
asv continuous origin/main HEAD --config benchmarks/asv.conf.json
83+
```
84+
85+
#### ASV PR Behavior
86+
87+
- **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section.
88+
- Merging is blocked unless benchmarks are run for the latest commit.
89+
- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.
90+
- The "Benchmark PR" workflow runs when its manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.
91+
6692

6793
### Contribute to the documentation
6894

tests/benchmark/test_benchmark_numba_compile.py

-33
This file was deleted.

0 commit comments

Comments
 (0)