add BAAI/bge-small-en-v1.5 Optimization #1634
Draft
Azure Pipelines / Olive CI
failed
Feb 21, 2025 in 27m 27s
Build #20250220.7 had test failures
Details
- Failed: 1 (0.04%)
- Passed: 2,449 (94.89%)
- Other: 131 (5.08%)
- Total: 2,581
- 12256 of 18765 line covered (65.31%)
Annotations
Check failure on line 9543 in Build log
azure-pipelines / Olive CI
Build log #L9543
Bash exited with code '1'.
Check failure on line 19 in Build log
azure-pipelines / Olive CI
Build log #L19
There are one or more test failures detected in result files. Detailed summary of published test results can be viewed in the Tests tab.
azure-pipelines / Olive CI
test_mnb_to_qdq[asymmetric-CPUExecutionProvider-nodes_to_exclude1-True-False-False]
Failed: DID NOT RAISE <class 'AssertionError'>
Raw output
create_mnb_model = (PosixPath('/tmp/pytest-of-root/pytest-0/test_mnb_to_qdq_asymmetric_CPU11/mnb.onnx'), 33, False)
execution_provider = 'CPUExecutionProvider'
nodes_to_exclude = ['/f1/MatMul_Q4'], add_zero_point = True, use_int4 = False
use_transpose_op = False
tmp_path = PosixPath('/tmp/pytest-of-root/pytest-0/test_mnb_to_qdq_asymmetric_CPU11')
@pytest.mark.skipif(
version.parse(onnxruntime.__version__) < version.parse("1.20"),
reason="Int4 DQ is only supported in ORT >= 1.20",
)
@pytest.mark.parametrize("use_transpose_op", [True, False])
@pytest.mark.parametrize("use_int4", [True, False])
@pytest.mark.parametrize("add_zero_point", [True, False])
@pytest.mark.parametrize("nodes_to_exclude", [None, ["/f1/MatMul_Q4"]])
@pytest.mark.parametrize("execution_provider", ["CPUExecutionProvider"])
def test_mnb_to_qdq(
create_mnb_model, execution_provider, nodes_to_exclude, add_zero_point, use_int4, use_transpose_op, tmp_path
):
available_providers = onnxruntime.get_available_providers()
if execution_provider not in available_providers:
pytest.skip(f"{execution_provider} is not available on this system {available_providers}")
mnb_path, in_dim, is_symmetric = create_mnb_model
input_model = ONNXModelHandler(mnb_path)
# setup
p = create_pass_from_dict(
MatMulNBitsToQDQ,
{
"use_transpose_op": use_transpose_op,
"use_int4": use_int4,
"add_zero_point": add_zero_point,
"nodes_to_exclude": nodes_to_exclude,
},
disable_search=True,
)
output_folder = tmp_path / "qdq-model"
# execute
qdq_model: ONNXModelHandler = p.run(input_model, output_folder)
# count ops
num_matmuls = 0
num_mnbs = 0
dag = OnnxDAG.from_model_path(qdq_model.model_path)
for name in dag.get_node_names():
op_type = dag.get_node_op_type(name)
if op_type == "MatMul":
num_matmuls += 1
elif op_type == "MatMulNBits":
num_mnbs += 1
assert num_matmuls == 3 - len(nodes_to_exclude or [])
assert num_mnbs == len(nodes_to_exclude or [])
# validate
original_session = onnxruntime.InferenceSession(str(mnb_path), providers=[execution_provider])
original_session.disable_fallback()
if is_symmetric and use_int4 and not add_zero_point and use_transpose_op:
# there seems to be a bug in ORT graph optimization which changes the int4 DQ to uint8 DQ
with pytest.raises(Exception, match="uint8"):
onnxruntime.InferenceSession(str(qdq_model.model_path), providers=[execution_provider])
return
else:
qdq_session = onnxruntime.InferenceSession(str(qdq_model.model_path), providers=[execution_provider])
qdq_session.disable_fallback()
input_data = {"input": np.random.randn(1, 1, in_dim).astype(np.float32)}
original_output = original_session.run(None, input_data)[0]
qdq_output = qdq_session.run(None, input_data)[0]
assert original_output.shape == qdq_output.shape
assert original_output.dtype == qdq_output.dtype
if execution_provider == "CPUExecutionProvider" and not use_transpose_op:
# Pre transposed DQ model does not match the expected output on x64 CPU
# check for assertion failure so we know when the test is fixed
> with pytest.raises(AssertionError):
E Failed: DID NOT RAISE <class 'AssertionError'>
test/unit_test/passes/onnx/test_mnb_to_qdq.py:129: Failed
Loading