Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Support boolean masks for iloc.__getitem__ #61162

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Other enhancements
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1582,11 +1582,7 @@ def _validate_key(self, key, axis: AxisInt) -> None:
if com.is_bool_indexer(key):
if hasattr(key, "index") and isinstance(key.index, Index):
if key.index.inferred_type == "integer":
raise NotImplementedError(
"iLocation based boolean "
"indexing on an integer type "
"is not available"
)
return
raise ValueError(
"iLocation based boolean indexing cannot use an indexable as a mask"
)
Expand Down
49 changes: 29 additions & 20 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,21 +726,27 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value):

@pytest.mark.filterwarnings("ignore::UserWarning")
def test_iloc_mask(self):
# GH 3631, iloc with a mask (of a series) should raise
# GH 60994, iloc with a mask (of a series) should return accordingly
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
mask = df.a % 2 == 0
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
with pytest.raises(ValueError, match=msg):
df.iloc[mask]

mask.index = range(len(mask))
msg = "iLocation based boolean indexing on an integer type is not available"
with pytest.raises(NotImplementedError, match=msg):
msg = "Unalignable boolean Series provided as indexer"
with pytest.raises(IndexingError, match=msg):
df.iloc[mask]

# ndarray ok
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
tm.assert_frame_equal(result, df)

result2 = df.iloc[np.array([True, False, True, False, True], dtype=bool)]
tm.assert_frame_equal(
result2, DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"])
)

# the possibilities
locs = np.arange(4)
nums = 2**locs
Expand All @@ -753,18 +759,13 @@ def test_iloc_mask(self):
(None, ".iloc"): "0b1100",
("index", ""): "0b11",
("index", ".loc"): "0b11",
("index", ".iloc"): (
"iLocation based boolean indexing cannot use an indexable as a mask"
),
("locs", ""): "Unalignable boolean Series provided as indexer "
"(index of the boolean Series and of the indexed "
"object do not match).",
("locs", ".loc"): "Unalignable boolean Series provided as indexer "
"(index of the boolean Series and of the "
"indexed object do not match).",
("locs", ".iloc"): (
"iLocation based boolean indexing on an integer type is not available"
),
(
"index",
".iloc",
): "iLocation based boolean indexing cannot use an indexable as a mask",
("locs", ""): "Unalignable boolean Series provided as indexer",
("locs", ".loc"): "Unalignable boolean Series provided as indexer",
("locs", ".iloc"): "Unalignable boolean Series provided as indexer",
}

# UserWarnings from reindex of a boolean mask
Expand All @@ -780,18 +781,26 @@ def test_iloc_mask(self):
else:
accessor = df
answer = str(bin(accessor[mask]["nums"].sum()))
except (ValueError, IndexingError, NotImplementedError) as err:
except (ValueError, IndexingError) as err:
answer = str(err)

key = (
idx,
method,
)
r = expected.get(key)
if r != answer:
raise AssertionError(
f"[{key}] does not match [{answer}], received [{r}]"
expected_result = expected.get(key)

# Fix the assertion to check for substring match
if (
idx is None or (idx == "index" and method != ".iloc")
) and "0b" in expected_result:
# For successful numeric results, exact match is needed
assert expected_result == answer, (
f"[{key}] does not match [{answer}]"
)
else:
# For error messages, substring match is sufficient
assert expected_result in answer, f"[{key}] not found in [{answer}]"

def test_iloc_non_unique_indexing(self):
# GH 4017, non-unique indexing (on the axis)
Expand Down