Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add BAAI/bge-small-en-v1.5 Optimization #1634

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from

linter

93ae4c4
Select commit
Loading
Failed to load commit list.
Draft

add BAAI/bge-small-en-v1.5 Optimization #1634

linter
93ae4c4
Select commit
Loading
Failed to load commit list.
Azure Pipelines / Olive CI failed Feb 21, 2025 in 27m 27s

Build #20250220.7 had test failures

Details

Tests

  • Failed: 1 (0.04%)
  • Passed: 2,449 (94.89%)
  • Other: 131 (5.08%)
  • Total: 2,581
Code coverage

  • 12256 of 18765 line covered (65.31%)

Annotations

Check failure on line 9543 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / Olive CI

Build log #L9543

Bash exited with code '1'.

Check failure on line 19 in Build log

See this annotation in the file changed.

@azure-pipelines azure-pipelines / Olive CI

Build log #L19

There are one or more test failures detected in result files. Detailed summary of published test results can be viewed in the Tests tab.

Check failure on line 1 in test_mnb_to_qdq[asymmetric-CPUExecutionProvider-nodes_to_exclude1-True-False-False]

See this annotation in the file changed.

@azure-pipelines azure-pipelines / Olive CI

test_mnb_to_qdq[asymmetric-CPUExecutionProvider-nodes_to_exclude1-True-False-False]

Failed: DID NOT RAISE <class 'AssertionError'>
Raw output
create_mnb_model = (PosixPath('/tmp/pytest-of-root/pytest-0/test_mnb_to_qdq_asymmetric_CPU11/mnb.onnx'), 33, False)
execution_provider = 'CPUExecutionProvider'
nodes_to_exclude = ['/f1/MatMul_Q4'], add_zero_point = True, use_int4 = False
use_transpose_op = False
tmp_path = PosixPath('/tmp/pytest-of-root/pytest-0/test_mnb_to_qdq_asymmetric_CPU11')

    @pytest.mark.skipif(
        version.parse(onnxruntime.__version__) < version.parse("1.20"),
        reason="Int4 DQ is only supported in ORT >= 1.20",
    )
    @pytest.mark.parametrize("use_transpose_op", [True, False])
    @pytest.mark.parametrize("use_int4", [True, False])
    @pytest.mark.parametrize("add_zero_point", [True, False])
    @pytest.mark.parametrize("nodes_to_exclude", [None, ["/f1/MatMul_Q4"]])
    @pytest.mark.parametrize("execution_provider", ["CPUExecutionProvider"])
    def test_mnb_to_qdq(
        create_mnb_model, execution_provider, nodes_to_exclude, add_zero_point, use_int4, use_transpose_op, tmp_path
    ):
        available_providers = onnxruntime.get_available_providers()
        if execution_provider not in available_providers:
            pytest.skip(f"{execution_provider} is not available on this system {available_providers}")
    
        mnb_path, in_dim, is_symmetric = create_mnb_model
        input_model = ONNXModelHandler(mnb_path)
    
        # setup
        p = create_pass_from_dict(
            MatMulNBitsToQDQ,
            {
                "use_transpose_op": use_transpose_op,
                "use_int4": use_int4,
                "add_zero_point": add_zero_point,
                "nodes_to_exclude": nodes_to_exclude,
            },
            disable_search=True,
        )
        output_folder = tmp_path / "qdq-model"
    
        # execute
        qdq_model: ONNXModelHandler = p.run(input_model, output_folder)
    
        # count ops
        num_matmuls = 0
        num_mnbs = 0
        dag = OnnxDAG.from_model_path(qdq_model.model_path)
        for name in dag.get_node_names():
            op_type = dag.get_node_op_type(name)
            if op_type == "MatMul":
                num_matmuls += 1
            elif op_type == "MatMulNBits":
                num_mnbs += 1
        assert num_matmuls == 3 - len(nodes_to_exclude or [])
        assert num_mnbs == len(nodes_to_exclude or [])
        # validate
        original_session = onnxruntime.InferenceSession(str(mnb_path), providers=[execution_provider])
        original_session.disable_fallback()
        if is_symmetric and use_int4 and not add_zero_point and use_transpose_op:
            # there seems to be a bug in ORT graph optimization which changes the int4 DQ to uint8 DQ
            with pytest.raises(Exception, match="uint8"):
                onnxruntime.InferenceSession(str(qdq_model.model_path), providers=[execution_provider])
            return
        else:
            qdq_session = onnxruntime.InferenceSession(str(qdq_model.model_path), providers=[execution_provider])
            qdq_session.disable_fallback()
    
        input_data = {"input": np.random.randn(1, 1, in_dim).astype(np.float32)}
        original_output = original_session.run(None, input_data)[0]
        qdq_output = qdq_session.run(None, input_data)[0]
        assert original_output.shape == qdq_output.shape
        assert original_output.dtype == qdq_output.dtype
        if execution_provider == "CPUExecutionProvider" and not use_transpose_op:
            # Pre transposed DQ model does not match the expected output on x64 CPU
            # check for assertion failure so we know when the test is fixed
>           with pytest.raises(AssertionError):
E           Failed: DID NOT RAISE <class 'AssertionError'>

test/unit_test/passes/onnx/test_mnb_to_qdq.py:129: Failed