Skip to content

Commit

Permalink
Fix isin bug (#16049)
Browse files Browse the repository at this point in the history
  • Loading branch information
maurever authored Feb 1, 2024
1 parent 4e3b467 commit 71ef538
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
2 changes: 1 addition & 1 deletion h2o-py/h2o/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1845,7 +1845,7 @@ def isin(self, item):
"""
if is_type(item, list, tuple, set):
if self.ncols == 1 and (self.type(0) == 'str' or self.type(0) == 'enum'):
return self.match(item, nomatch=0)
return self.match(item, nomatch=0) > 0
else:
return functools.reduce(H2OFrame.__or__, (self == i for i in item))
else:
Expand Down
13 changes: 13 additions & 0 deletions h2o-py/tests/testdir_misc/pyunit_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,22 @@ def isin_check():
assert not (cars.isin(["AMC Gremlin","AMC Concord DL"]) == cars.isin("AMC Gremlin")).all()
assert (cars.isin(["AMC Gremlin","AMC Concord DL",6]) == cars.isin("AMC Gremlin") + cars.isin("AMC Concord DL")
+ cars.isin(6)).all()


def test_isin_issue_16043():
frame = h2o.upload_file(pyunit_utils.locate("smalldata/titanic/titanic_expanded.csv"))
column = "cabin"
levels = frame[column].levels()[0][:-10]
assert len(levels) > 10
print(frame[column].isin(levels).unique())
# expecting a binary vector as in pandas
assert frame[column].isin(levels).unique().nrows == 2
assert frame.as_data_frame(use_pandas=True)[column].isin(levels).nunique() == 2


if __name__ == "__main__":
pyunit_utils.standalone_test(isin_check)
pyunit_utils.standalone_test(test_isin_issue_16043)
else:
isin_check()
test_isin_issue_16043()

0 comments on commit 71ef538

Please sign in to comment.