[SW-218939] fix memory mapping failure in UT (#2154)

xin3he · web-flow · commit 470649cf48fa · 2025-03-28T13:50:05.000+08:00
Signed-off-by: Xin He &lt;xinhe3@habana.ai&gt;
diff --git a/test/3x/torch/quantization/fp8_quant/test_layer_wise.py b/test/3x/torch/quantization/fp8_quant/test_layer_wise.py
@@ -16,7 +16,7 @@ def test_two_step_layer_wise():
     # requires transformers >= 4.43.0, torch_dtype=config.torch_dtype
     # facebook/opt-350m parameters on disk is in torch.float16 dtype
     cpu_mem0 = get_used_cpu_mem_MB()
-    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=config.torch_dtype)
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=config.torch_dtype, use_safetensors=True)
     cpu_mem1 = get_used_cpu_mem_MB()
     assert (cpu_mem1 - cpu_mem0) < 100, "model with memory mapping should use no more than 100MiB."
 
@@ -33,7 +33,7 @@ def test_two_step_layer_wise():
 
     # fp16 llama2-7b is converted to bf16 during quantization layer-by-layer.
     cpu_mem0 = get_used_cpu_mem_MB()
-    new_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=config.torch_dtype)
+    new_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=config.torch_dtype, use_safetensors=True)
     cpu_mem2 = get_used_cpu_mem_MB()
     model = convert(new_model, qconfig)
     assert (cpu_mem2 - cpu_mem0) < 100, "model with memory mapping should use no more than 100MiB."