Skip to content

Commit

Permalink
add remove_overlap_spans
Browse files Browse the repository at this point in the history
  • Loading branch information
tamuhey committed Feb 5, 2021
1 parent a5eeb26 commit eb3bf75
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 2 deletions.
21 changes: 19 additions & 2 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ fn textspan(_py: Python, m: &PyModule) -> PyResult<()> {
/// Examples:
/// >>> import textspan
/// >>> spans = [(0, 2), (0, 3), (2, 4), (5, 7)]
/// >>> textspan.remove_span_overlaps(spans)
/// [(0, 3), (5, 7)]
/// >>> assert textspan.remove_span_overlaps(spans) == [(0, 3), (5, 7)]
///
///
#[pyfn(m, "remove_span_overlaps")]
Expand All @@ -93,6 +92,24 @@ fn textspan(_py: Python, m: &PyModule) -> PyResult<()> {
Ok(textspan::remove_span_overlaps(&spans))
}

/// Remove overlapping spans from given `spans`, and returns remained span indices.
///
/// First, longest spans are remained - if the two spans are overlapped, the
/// first span will be remained. If the two spans are overlapped and their start
/// positions are same, the longer span will be remained.
///
/// Examples:
/// >>> import textspan
/// >>> spans = [(0, 2), (0, 3), (2, 4), (5, 7)]
/// >>> assert textspan.remove_span_overlaps_idx(spans) == [1, 3]
///
///
#[pyfn(m, "remove_span_overlaps_idx")]
#[text_signature = "(spans)"]
pub fn remove_span_overlaps_idx(_py: Python, spans: Vec<Span>) -> PyResult<Vec<usize>> {
Ok(textspan::remove_span_overlaps_idx(&spans))
}

fn to_tuple<T>(x: Result<T, T>) -> (T, bool) {
match x {
Ok(x) => (x, true),
Expand Down
6 changes: 6 additions & 0 deletions python/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ def test_random_get_original_spans2(tokens, text, expected):
def test_lift_span_index(span, spans, expected):
assert textspan.lift_span_index(span, spans) == expected
assert textspan.lift_spans_index([span], spans) == [expected]


def test_remove_span_overlaps():
spans = [(0, 2), (0, 3), (2, 4), (5, 7)]
assert textspan.remove_span_overlaps(spans) == [(0, 3), (5, 7)]
assert textspan.remove_span_overlaps_idx(spans) == [1, 3]
2 changes: 2 additions & 0 deletions python/textspan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
align_spans_by_mapping,
align_spans,
remove_span_overlaps,
remove_span_overlaps_idx,
lift_spans_index,
lift_span_index,
)
Expand All @@ -12,6 +13,7 @@
"align_spans_by_mapping",
"align_spans",
"remove_span_overlaps",
"remove_span_overlaps_idx",
"lift_span_index",
"lift_spans_index",
]
1 change: 1 addition & 0 deletions python/textspan/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ def get_original_spans(
tokens: List[str], original_text: str,
) -> List[List[Tuple[int, int]]]: ...
def remove_span_overlaps(tokens: List[Tuple[int, int]]) -> List[Tuple[int, int]]: ...
def remove_span_overlaps_idx(tokens: List[Tuple[int, int]]) -> List[int]: ...
def lift_span_index(span: Tuple[int, int], target_spans: List[Tuple[int, int]]) -> Tuple[Tuple[int, bool], Tuple[int, bool]]: ...
def lift_spans_index(spans: List[Tuple[int, int]], target_spans: List[Tuple[int, int]]) -> List[Tuple[Tuple[int, bool], Tuple[int, bool]]]: ...

0 comments on commit eb3bf75

Please sign in to comment.