diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b4aa6447c0a1b..565cd1c54d42c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -703,6 +703,7 @@ Indexing - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) +- Bug in :meth:`Series.mask` unexpectedly filling ``pd.NA`` (:issue:`60729`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c3f535df9ce2..111d40c2f758d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,6 +28,7 @@ from pandas._libs import lib from pandas._libs.lib import is_range_indexer +from pandas._libs.missing import NA from pandas._libs.tslibs import ( Period, Timestamp, @@ -9701,6 +9702,7 @@ def _where( # align the cond to same shape as myself cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): + cond = cond.fillna(True) # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: cond = cond._constructor_expanddim( @@ -9711,7 +9713,10 @@ def _where( cond = cond.align(self, join="right")[0] else: if not hasattr(cond, "shape"): - cond = np.asanyarray(cond) + cond = np.asanyarray(cond, dtype=object) + if not cond.flags.writeable: + cond.setflags(write=True) + cond[isna(cond)] = True if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) @@ -10095,7 +10100,17 @@ def mask( # see gh-21891 if not hasattr(cond, "__invert__"): - cond = np.array(cond) + cond = np.array(cond, dtype=object) + + if isinstance(cond, np.ndarray): + if all( + x is NA or isinstance(x, (np.bool_, bool)) or x is np.nan + for x in cond.flatten() + ): + if not cond.flags.writeable: + cond.setflags(write=True) + cond[isna(cond)] = False + cond = cond.astype(bool) return self._where( ~cond, diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 3c21cd0d5ca64..3bfade287bd05 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import Series import pandas._testing as tm @@ -67,3 +69,26 @@ def test_mask_inplace(): rs = s.copy() rs.mask(cond, -s, inplace=True) tm.assert_series_equal(rs, s.mask(cond, -s)) + + +@pytest.mark.parametrize( + "dtype", + [ + "Int64", + pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")), + ], +) +def test_mask_na(dtype): + # We should not be filling pd.NA. See GH#60729 + series = Series([None, 1, 2, None, 3, 4, None], dtype=dtype) + cond = series <= 2 + expected = Series([None, -99, -99, None, 3, 4, None], dtype=dtype) + + result = series.mask(cond, -99) + tm.assert_series_equal(result, expected) + + result = series.mask(cond.to_list(), -99) + tm.assert_series_equal(result, expected) + + result = series.mask(cond.to_numpy(), -99) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 663ee8ad0ee38..f9f3f7e0dadd5 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._libs.missing import NA + from pandas.core.dtypes.common import is_integer import pandas as pd @@ -443,3 +445,13 @@ def test_where_datetimelike_categorical(tz_naive_fixture): res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals)) tm.assert_frame_equal(res, pd.DataFrame(dr)) + + +def test_where_list_with_nan(): + ser = Series([None, 1, 2, np.nan, 3, 4, NA]) + cond = [np.nan, False, False, np.nan, True, True, np.nan] + expected = Series([None, -99, -99, np.nan, 3, 4, NA]) + + res = ser.where(cond, -99) + + tm.assert_series_equal(res, expected)