-
-
Notifications
You must be signed in to change notification settings - Fork 6.6k
/
Copy pathtest_sampler.py
48 lines (42 loc) · 1.67 KB
/
test_sampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# SPDX-License-Identifier: Apache-2.0
import pytest
from vllm import LLM, envs
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
if not envs.VLLM_USE_V1:
pytest.skip(
"Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.",
allow_module_level=True,
)
@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
@pytest.mark.skipif(not current_platform.is_tpu(),
reason="This test needs a TPU")
def test_sampler_different(model_name: str):
"""
Test significantly different sampling params to assert the model produces
different results.
"""
llm = LLM(model_name,
enforce_eager=False,
max_num_seqs=1,
max_model_len=512,
max_num_batched_tokens=512)
prompts = [
"Write a short story about a robot that dreams for the first time."
]
sampling_params = SamplingParams(temperature=0.9, min_p=0.2, max_tokens=64)
output = llm.generate(prompts, sampling_params)
sampling_params = SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64)
output2 = llm.generate(prompts, sampling_params)
assert output[0].outputs[0].text != output2[0].outputs[0].text
# Batch-case with TopK
for B in [4, 16]:
p = prompts * B
sampling_params = [
SamplingParams(temperature=0.1, min_p=0.8, max_tokens=64, top_k=12)
] * B
# disable on first prompt to check top k handles it
sampling_params[0].top_k = -1
sampling_params[0].min_p = 0
output = llm.generate(p, sampling_params)
assert output[0].outputs[0].text != output[-1].outputs[0].text