From eb3bf7596fbf209f6a84670ac072935398ec2021 Mon Sep 17 00:00:00 2001 From: Yohei Tamura Date: Fri, 5 Feb 2021 13:26:01 +0900 Subject: [PATCH] add remove_overlap_spans --- python/src/lib.rs | 21 +++++++++++++++++++-- python/tests/test_main.py | 6 ++++++ python/textspan/__init__.py | 2 ++ python/textspan/__init__.pyi | 1 + 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/python/src/lib.rs b/python/src/lib.rs index 17a8bc0..9a140b9 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -83,8 +83,7 @@ fn textspan(_py: Python, m: &PyModule) -> PyResult<()> { /// Examples: /// >>> import textspan /// >>> spans = [(0, 2), (0, 3), (2, 4), (5, 7)] - /// >>> textspan.remove_span_overlaps(spans) - /// [(0, 3), (5, 7)] + /// >>> assert textspan.remove_span_overlaps(spans) == [(0, 3), (5, 7)] /// /// #[pyfn(m, "remove_span_overlaps")] @@ -93,6 +92,24 @@ fn textspan(_py: Python, m: &PyModule) -> PyResult<()> { Ok(textspan::remove_span_overlaps(&spans)) } + /// Remove overlapping spans from given `spans`, and returns remained span indices. + /// + /// First, longest spans are remained - if the two spans are overlapped, the + /// first span will be remained. If the two spans are overlapped and their start + /// positions are same, the longer span will be remained. + /// + /// Examples: + /// >>> import textspan + /// >>> spans = [(0, 2), (0, 3), (2, 4), (5, 7)] + /// >>> assert textspan.remove_span_overlaps_idx(spans) == [1, 3] + /// + /// + #[pyfn(m, "remove_span_overlaps_idx")] + #[text_signature = "(spans)"] + pub fn remove_span_overlaps_idx(_py: Python, spans: Vec) -> PyResult> { + Ok(textspan::remove_span_overlaps_idx(&spans)) + } + fn to_tuple(x: Result) -> (T, bool) { match x { Ok(x) => (x, true), diff --git a/python/tests/test_main.py b/python/tests/test_main.py index 00c9b5b..fb7ccca 100644 --- a/python/tests/test_main.py +++ b/python/tests/test_main.py @@ -51,3 +51,9 @@ def test_random_get_original_spans2(tokens, text, expected): def test_lift_span_index(span, spans, expected): assert textspan.lift_span_index(span, spans) == expected assert textspan.lift_spans_index([span], spans) == [expected] + + +def test_remove_span_overlaps(): + spans = [(0, 2), (0, 3), (2, 4), (5, 7)] + assert textspan.remove_span_overlaps(spans) == [(0, 3), (5, 7)] + assert textspan.remove_span_overlaps_idx(spans) == [1, 3] diff --git a/python/textspan/__init__.py b/python/textspan/__init__.py index f9888fe..3ee2545 100644 --- a/python/textspan/__init__.py +++ b/python/textspan/__init__.py @@ -3,6 +3,7 @@ align_spans_by_mapping, align_spans, remove_span_overlaps, + remove_span_overlaps_idx, lift_spans_index, lift_span_index, ) @@ -12,6 +13,7 @@ "align_spans_by_mapping", "align_spans", "remove_span_overlaps", + "remove_span_overlaps_idx", "lift_span_index", "lift_spans_index", ] diff --git a/python/textspan/__init__.pyi b/python/textspan/__init__.pyi index 883e17b..378ac95 100644 --- a/python/textspan/__init__.pyi +++ b/python/textspan/__init__.pyi @@ -10,5 +10,6 @@ def get_original_spans( tokens: List[str], original_text: str, ) -> List[List[Tuple[int, int]]]: ... def remove_span_overlaps(tokens: List[Tuple[int, int]]) -> List[Tuple[int, int]]: ... +def remove_span_overlaps_idx(tokens: List[Tuple[int, int]]) -> List[int]: ... def lift_span_index(span: Tuple[int, int], target_spans: List[Tuple[int, int]]) -> Tuple[Tuple[int, bool], Tuple[int, bool]]: ... def lift_spans_index(spans: List[Tuple[int, int]], target_spans: List[Tuple[int, int]]) -> List[Tuple[Tuple[int, bool], Tuple[int, bool]]]: ...