Skip to content

Commit 93a0a84

Browse files
committedSep 23, 2024
add requirements and update table for memory footprint
1 parent 53874c4 commit 93a0a84

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed
 

‎LLM_size_pef_calculator.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ def calc_memory_footprint(model_spec, n_concurrent_request, avg_context_window):
5959
print(f"\n******************** Estimate LLM Memory Footprint ********************")
6060
memory_footprint_table = []
6161
for model_spec in model_specs:
62+
kv_cache_size_per_token = calc_kv_cache_size_per_token(model_spec["n_layers"], model_spec["d_model"])
6263
memory_footprint = calc_memory_footprint(model_spec, n_concurrent_request, avg_context_window)
63-
memory_footprint_table.append([model_spec['name'], f"{memory_footprint:.2f} GB"])
64-
print(tabulate(memory_footprint_table, headers=['Model', 'Memory Footprint'], tablefmt='orgtbl'))
64+
memory_footprint_table.append([model_spec['name'], f"{kv_cache_size_per_token:.6f} GiB/token", f"{memory_footprint:.2f} GB"])
65+
print(tabulate(memory_footprint_table, headers=['Model', 'KV Cache Size per Token', 'Memory Footprint'], tablefmt='orgtbl'))
6566

6667
def calc_kv_cache_tokens(num_gpu, gpu_memory_gb, model_params_billion, kv_cache_size):
6768
result = (num_gpu * gpu_memory_gb - 2 * model_params_billion) / kv_cache_size

‎requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tabulate

0 commit comments

Comments
 (0)
Please sign in to comment.