[fix] fix transformers wrapper version miismatch.

efeslab · Jul 2, 2024 · 7e3618b · 7e3618b
1 parent f8b7bcc
commit 7e3618b
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -84,9 +84,9 @@ bash scripts/run_atom_ablation.sh /Path/To/Llama/Model
 ```
 
 
-You can also customize your own quantization setup by modifying the parameters. Check [model/llama.py](./model/llama.py) to see the description of each parameter.
+You can also customize your own quantization setup by modifying the parameters. Check [model/main.py](./model/main.py) to see the description of each parameter.
 ```
-python model/llama.py /Path/To/Llama/Model wikitext2 \
+python model/main.py /Path/To/Llama/Model wikitext2 \
     --wbits 4 --abits 4 --a_sym --w_sym \
     --act_group_size 128 --weight_group_size 128 --weight_channel_group 2 \
     --reorder --act_sort_metric hessian \

diff --git a/model/qLlamaLayer.py b/model/qLlamaLayer.py
@@ -92,6 +92,7 @@ def forward(
         past_key_value: Optional[Tuple[torch.Tensor]] = None,
         output_attentions: Optional[bool] = False,
         use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
         padding_mask=None,
     ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
         residual = hidden_states

diff --git a/model/qMixtralLayer.py b/model/qMixtralLayer.py
@@ -385,6 +385,7 @@ def forward(
         output_attentions: Optional[bool] = False,
         output_router_logits: Optional[bool] = False,
         use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
         **kwargs,
     ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
         if "padding_mask" in kwargs:

diff --git a/model/qOPTLayer.py b/model/qOPTLayer.py
@@ -242,6 +242,7 @@ def forward(
         past_key_value: Optional[Tuple[torch.Tensor]] = None,
         output_attentions: Optional[bool] = False,
         use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
     ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
         """
         Args: