8
8
from configs import BootstrapConfig , WorkloadConfig , Usecase
9
9
from configs import VLLMConfig , VLLMOptionalConfig , LMCacheConfig , EngineType
10
10
from utils import run_command , get_max_context_length
11
+ import yaml
11
12
12
13
##### Helper functions #####
13
14
def CreateSingleLocalBootstrapConfig (
@@ -133,9 +134,9 @@ def test_chunk_prefill(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd.DataF
133
134
ModelConfig (model , config1 )
134
135
ModelConfig (model , config2 )
135
136
136
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
137
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
137
138
lengths = [8192 , 16384 , 24576 ]
138
- experiments = [CreateDummyExperiment (5 , length ) for length in lengths ]
139
+ experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
139
140
140
141
test_case = TestCase (
141
142
experiments = experiments ,
@@ -206,7 +207,7 @@ def test_lmcache_local_gpu(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd.D
206
207
207
208
# Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
208
209
lengths = [8192 , 16384 , 24576 ]
209
- experiments = [CreateDummyExperiment (5 , length ) for length in lengths ]
210
+ experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
210
211
211
212
test_case = TestCase (
212
213
experiments = experiments ,
@@ -245,14 +246,15 @@ def test_lmcache_local_disk(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd.
245
246
This function tests local disk storage backend by comparing scenarios with and without lmcache.
246
247
"""
247
248
# Start two servers: with lmcache and without lmcache
248
- config1 = CreateSingleLocalBootstrapConfig (8000 , 0 , model , "configs/lmcache_local_disk.yaml" )
249
+ yaml_config = "configs/lmcache_local_disk.yaml"
250
+ config1 = CreateSingleLocalBootstrapConfig (8000 , 0 , model , yaml_config )
249
251
config2 = CreateSingleLocalBootstrapConfig (8001 , 1 , model , None )
250
252
251
253
# Set vllm configuration for different models
252
254
ModelConfig (model , config1 )
253
255
ModelConfig (model , config2 )
254
256
255
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
257
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
256
258
lengths = [8192 , 16384 , 24576 ]
257
259
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
258
260
@@ -262,6 +264,13 @@ def test_lmcache_local_disk(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd.
262
264
263
265
# Run test case
264
266
final_result = run_test_case (test_case )
267
+
268
+ # Clean up
269
+ with open (yaml_config , 'r' ) as file :
270
+ data = yaml .safe_load (file )
271
+ local_device = data .get ('local_device' ) + "*"
272
+ os .system (f"rm -rf { local_device } " )
273
+
265
274
return final_result
266
275
267
276
def test_lmcache_local_distributed (model = "mistralai/Mistral-7B-Instruct-v0.2" ) -> pd .DataFrame :
@@ -277,7 +286,7 @@ def test_lmcache_local_distributed(model = "mistralai/Mistral-7B-Instruct-v0.2")
277
286
# Set vllm configuration for different models
278
287
ModelConfig (model , config )
279
288
280
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
289
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
281
290
lengths = [8192 , 16384 , 24576 ]
282
291
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
283
292
@@ -302,7 +311,7 @@ def test_lmcache_remote_cachegen(model = "mistralai/Mistral-7B-Instruct-v0.2") -
302
311
ModelConfig (model , config1 )
303
312
ModelConfig (model , config2 )
304
313
305
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
314
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
306
315
lengths = [8192 , 16384 , 24576 ]
307
316
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
308
317
@@ -354,7 +363,7 @@ def test_lmcache_remote_safetensor(model = "mistralai/Mistral-7B-Instruct-v0.2")
354
363
ModelConfig (model , config1 )
355
364
ModelConfig (model , config2 )
356
365
357
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
366
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
358
367
lengths = [8192 , 16384 , 24576 ]
359
368
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
360
369
@@ -379,7 +388,7 @@ def test_lmcache_safetensor_distributed(model = "mistralai/Mistral-7B-Instruct-v
379
388
# Set vllm configuration for different models
380
389
ModelConfig (model , config )
381
390
382
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
391
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
383
392
lengths = [8192 , 16384 , 24576 ]
384
393
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
385
394
@@ -399,13 +408,13 @@ def test_lmcache_remote_disk(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd
399
408
config1 = CreateSingleLocalBootstrapConfig (8000 , 0 , model , "configs/lmcache_remote_cachegen.yaml" )
400
409
config2 = CreateSingleLocalBootstrapConfig (8001 , 1 , model , None )
401
410
402
- config1 .lmcache_config .remote_device = "/local/end-to-end-tests/lmcache-server"
411
+ config1 .lmcache_config .remote_device = "/local/end-to-end-tests/lmcache-server/ "
403
412
404
413
# Set vllm configuration for different models
405
414
ModelConfig (model , config1 )
406
415
ModelConfig (model , config2 )
407
416
408
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
417
+ # Experiments: 8K, 16K, 24K shared context, each experiments has 10 queries
409
418
lengths = [8192 , 16384 , 24576 ]
410
419
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
411
420
@@ -415,22 +424,30 @@ def test_lmcache_remote_disk(model = "mistralai/Mistral-7B-Instruct-v0.2") -> pd
415
424
416
425
# Run test case
417
426
final_result = run_test_case (test_case )
427
+
428
+ # Clean up
429
+ os .system (f"rm -rf { config1 .lmcache_config .remote_device } *" )
430
+
418
431
return final_result
419
432
420
433
def test_lmcache_redis_sentinel (model = "mistralai/Mistral-7B-Instruct-v0.2" ) -> pd .DataFrame :
421
- config1 = CreateSingleLocalBootstrapConfig (8000 , 1 , model , "configs/lmcache_redis_sentinel_cachegen.yaml" )
434
+ # Set up the master node
435
+ os .environ ["REDIS_SERVICE_NAME" ] = "redismaster"
436
+
437
+ config1 = CreateSingleLocalBootstrapConfig (8000 , 0 , model , "configs/lmcache_redis_sentinel_cachegen.yaml" )
438
+ config2 = CreateSingleLocalBootstrapConfig (8001 , 1 , model , None )
422
439
423
440
# Set vllm configuration for different models
424
441
ModelConfig (model , config1 )
442
+ ModelConfig (model , config2 )
425
443
426
- # Experiments: 8K, 16K, 24K shared context, each experiments has 5 queries
427
- #lengths = [8192, 16384, 24576]
428
- lengths = [24576 ]
444
+ # Experiments: 10375 shared context, each experiments has 10 queries
445
+ lengths = [10375 ]
429
446
experiments = [CreateDummyExperiment (10 , length ) for length in lengths ]
430
447
431
448
test_case = TestCase (
432
449
experiments = experiments ,
433
- engines = [config1 ])
450
+ engines = [config1 , config2 ])
434
451
435
452
# Run test case
436
453
final_result = run_test_case (test_case )
0 commit comments