File tree 1 file changed +2
-2
lines changed
test/3x/torch/quantization/fp8_quant
1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -16,7 +16,7 @@ def test_two_step_layer_wise():
16
16
# requires transformers >= 4.43.0, torch_dtype=config.torch_dtype
17
17
# facebook/opt-350m parameters on disk is in torch.float16 dtype
18
18
cpu_mem0 = get_used_cpu_mem_MB ()
19
- model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype )
19
+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype , use_safetensors = True )
20
20
cpu_mem1 = get_used_cpu_mem_MB ()
21
21
assert (cpu_mem1 - cpu_mem0 ) < 100 , "model with memory mapping should use no more than 100MiB."
22
22
@@ -33,7 +33,7 @@ def test_two_step_layer_wise():
33
33
34
34
# fp16 llama2-7b is converted to bf16 during quantization layer-by-layer.
35
35
cpu_mem0 = get_used_cpu_mem_MB ()
36
- new_model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype )
36
+ new_model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = config .torch_dtype , use_safetensors = True )
37
37
cpu_mem2 = get_used_cpu_mem_MB ()
38
38
model = convert (new_model , qconfig )
39
39
assert (cpu_mem2 - cpu_mem0 ) < 100 , "model with memory mapping should use no more than 100MiB."
You can’t perform that action at this time.
0 commit comments