Skip to content

Commit 86868ba

Browse files
committed
Add benchmark script
1 parent 9c8c3a1 commit 86868ba

File tree

1 file changed

+203
-0
lines changed

1 file changed

+203
-0
lines changed

tools/bench_klp.py

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import datetime as dt
5+
import json
6+
import os
7+
import random
8+
import subprocess
9+
import sys
10+
import time
11+
from statistics import mean, stdev
12+
from typing import List, Dict, Tuple
13+
14+
15+
def generate_logfmt_line() -> str:
16+
ts = dt.datetime.now(dt.timezone.utc).isoformat()
17+
levels = ["DEBUG", "INFO", "WARNING", "ERROR"]
18+
messages = [
19+
"User login successful",
20+
"Database connection established",
21+
"Processing request",
22+
"Cache miss",
23+
"Metrics collection started",
24+
"Background job completed",
25+
"Request validation failed",
26+
"Rate limit exceeded",
27+
]
28+
return f'ts="{ts}" level="{random.choice(levels)}" msg="{random.choice(messages)}"'
29+
30+
31+
def generate_test_file(filename: str, num_lines: int) -> None:
32+
with open(filename, "w") as f:
33+
for _ in range(num_lines):
34+
f.write(generate_logfmt_line() + "\n")
35+
36+
37+
def generate_jsonl_line() -> str:
38+
data = {
39+
"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
40+
"level": random.choice(["DEBUG", "INFO", "WARNING", "ERROR"]),
41+
"message": random.choice(
42+
[
43+
"User login successful",
44+
"Database connection established",
45+
"Processing request",
46+
"Cache miss",
47+
"Metrics collection started",
48+
"Background job completed",
49+
"Request validation failed",
50+
"Rate limit exceeded",
51+
]
52+
),
53+
}
54+
return json.dumps(data)
55+
56+
57+
def generate_jsonl_file(filename: str, num_lines: int) -> None:
58+
with open(filename, "w") as f:
59+
for _ in range(num_lines):
60+
f.write(generate_jsonl_line() + "\n")
61+
62+
63+
def verify_setup() -> None:
64+
"""Verify klp is available and test files can be created."""
65+
try:
66+
subprocess.run(["klp", "--version"], capture_output=True, check=True)
67+
except FileNotFoundError:
68+
print("Error: klp not found in PATH. Please install klp first.")
69+
sys.exit(1)
70+
except subprocess.CalledProcessError as e:
71+
print(f"Error running klp: {e}")
72+
sys.exit(1)
73+
74+
75+
def run_benchmark(cmd: str, runs: int = 5) -> Tuple[float, float]:
76+
"""Run a command multiple times and return mean and stdev of execution times."""
77+
times = []
78+
for _ in range(runs):
79+
start = time.perf_counter()
80+
result = subprocess.run(cmd, shell=True, capture_output=True)
81+
end = time.perf_counter()
82+
if result.returncode != 0:
83+
print(f"Error running command: {cmd}")
84+
print(f"stderr: {result.stderr.decode()}")
85+
print(f"stdout: {result.stdout.decode()}")
86+
return 0.0, 0.0
87+
times.append(end - start)
88+
return mean(times), stdev(times)
89+
90+
91+
def format_time(seconds: float) -> str:
92+
"""Format time in seconds to human readable string."""
93+
if seconds < 0.001:
94+
return f"{seconds * 1000000:.2f} µs"
95+
elif seconds < 1:
96+
return f"{seconds * 1000:.2f} ms"
97+
else:
98+
return f"{seconds:.2f} s"
99+
100+
101+
def main() -> None:
102+
# Get script directory for relative paths
103+
script_dir = os.path.dirname(os.path.abspath(__file__))
104+
105+
parser = argparse.ArgumentParser(description="Run klp performance tests")
106+
parser.add_argument(
107+
"--lines", type=int, default=100000, help="Number of lines in test files"
108+
)
109+
parser.add_argument("--runs", type=int, default=5, help="Number of runs per test")
110+
parser.add_argument(
111+
"--debug", action="store_true", help="Show additional debug information"
112+
)
113+
args = parser.parse_args()
114+
115+
verify_setup()
116+
117+
# Base command prefix with common options
118+
base_cmd = "klp --no-color --errors print"
119+
120+
test_data_dir = os.path.join(script_dir, "test_data")
121+
os.makedirs(test_data_dir, exist_ok=True)
122+
123+
logfmt_path = os.path.join(test_data_dir, "test.logfmt")
124+
jsonl_path = os.path.join(test_data_dir, "test.jsonl")
125+
126+
print(f"Generating test files with {args.lines} lines each...")
127+
generate_test_file(logfmt_path, args.lines)
128+
generate_jsonl_file(jsonl_path, args.lines)
129+
130+
if args.debug:
131+
print(f"\nVerifying generated files:")
132+
print(f"logfmt file size: {os.path.getsize(logfmt_path)} bytes")
133+
print(f"jsonl file size: {os.path.getsize(jsonl_path)} bytes")
134+
print("\nFirst few lines of logfmt file:")
135+
with open(logfmt_path) as f:
136+
print("".join(f.readlines()[:3]))
137+
138+
# Define test cases
139+
test_cases = [
140+
{
141+
"name": "Basic logfmt parsing",
142+
"cmd": f"{base_cmd} {logfmt_path} > /dev/null",
143+
},
144+
{
145+
"name": "JSONL parsing",
146+
"cmd": f"{base_cmd} -f jsonl {jsonl_path} > /dev/null",
147+
},
148+
{
149+
"name": "Filtered by log level",
150+
"cmd": f"{base_cmd} -l error,warning {logfmt_path} > /dev/null",
151+
},
152+
{
153+
"name": "Time-based filtering",
154+
"cmd": f"{base_cmd} --since 1h {logfmt_path} > /dev/null",
155+
},
156+
{
157+
"name": "Grep filtering",
158+
"cmd": f"{base_cmd} -g 'error' {logfmt_path} > /dev/null",
159+
},
160+
{
161+
"name": "Multiple grep patterns",
162+
"cmd": f"{base_cmd} -g 'error' -g 'warning' {logfmt_path} > /dev/null",
163+
},
164+
{
165+
"name": "With key selection",
166+
"cmd": f"{base_cmd} -k timestamp,level,message {logfmt_path} > /dev/null",
167+
},
168+
{
169+
"name": "Output as JSONL",
170+
"cmd": f"{base_cmd} -F jsonl {logfmt_path} > /dev/null",
171+
},
172+
{
173+
"name": "Output as CSV",
174+
"cmd": f"{base_cmd} -F csv -k timestamp,level,message {logfmt_path} > /dev/null",
175+
},
176+
{
177+
"name": "Parallel processing",
178+
"cmd": f"{base_cmd} --parallel 0 {logfmt_path} > /dev/null",
179+
},
180+
]
181+
182+
# Run benchmarks
183+
print(f"\nRunning benchmarks ({args.runs} runs per test)...\n")
184+
results = []
185+
for case in test_cases:
186+
print(f"Testing: {case['name']}")
187+
mean_time, std_dev = run_benchmark(case["cmd"], args.runs)
188+
if mean_time > 0: # Only add successful tests
189+
results.append({"name": case["name"], "mean": mean_time, "stdev": std_dev})
190+
print(f" Mean: {format_time(mean_time)} ± {format_time(std_dev)}\n")
191+
192+
# Print summary
193+
if results:
194+
print("\nSummary (sorted by mean time):")
195+
print("-" * 80)
196+
for result in sorted(results, key=lambda x: x["mean"]):
197+
print(
198+
f"{result['name']:<30} {format_time(result['mean'])} ± {format_time(result['stdev'])}"
199+
)
200+
201+
202+
if __name__ == "__main__":
203+
main()

0 commit comments

Comments
 (0)