[bugfix] fix inductor cache on max_position_embeddings (#15436)

youkaichao · web-flow · commit d0cfec7ab919 · 2025-03-25T07:05:39.000-07:00
Signed-off-by: youkaichao &lt;youkaichao@gmail.com&gt;
diff --git a/vllm/config.py b/vllm/config.py
@@ -221,6 +221,9 @@ def compute_hash(self) -> str:
         factors.append(self.trust_remote_code)
         factors.append(self.rope_scaling)
         factors.append(self.rope_theta)
+        # rope cos/sin cache depends on the max_position_embeddings
+        factors.append(
+            getattr(self.hf_config, "max_position_embeddings", "None"))
         return hashlib.sha256(str(factors).encode()).hexdigest()
 
     def __init__(