From 38efbaa52beb0aeecc4b488281b187a281ee7d70 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 27 Aug 2024 19:39:28 -0400 Subject: [PATCH 01/18] Fix binary operators on attrs for Series and DataFrame --- pandas/core/base.py | 3 +++ pandas/core/frame.py | 7 +++++++ pandas/core/series.py | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/pandas/core/base.py b/pandas/core/base.py index 863cf978426e2..53117f6ba3f20 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1466,6 +1466,9 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): result = ops.arithmetic_op(lvalues, rvalues, op) + if not self.attrs and other.attrs: + self.attrs = other.attrs + return self._construct_result(result, name=res_name) def _construct_result(self, result, name): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b84fb33af26e5..b4de8cd0df9cc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7830,6 +7830,10 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): new_data = self._dispatch_frame_op(other, op, axis=axis) + + if not self.attrs and other.attrs: + self.attrs = other.attrs + return self._construct_result(new_data) _logical_method = _arith_method @@ -8185,6 +8189,9 @@ def _flex_arith_method( new_data = self._dispatch_frame_op(other, op) + if not self.attrs and other.attrs: + self.attrs = other.attrs + return self._construct_result(new_data) def _construct_result(self, result) -> DataFrame: diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f79e30f48f3c..83c932c0534d2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5916,6 +5916,10 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: result = func(this_vals, other_vals) name = ops.get_op_result_name(self, other) + + if not this.attrs and other.attrs: + this.attrs = other.attrs + out = this._construct_result(result, name) return cast(Series, out) From 15db834d6f1b7b64a9aec4da13285182a3cd20c9 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 27 Aug 2024 20:06:37 -0400 Subject: [PATCH 02/18] Add tests for Series and DataFrame for attrs binary operations --- pandas/tests/frame/test_api.py | 16 ++++++++++++++++ pandas/tests/series/test_api.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f8219e68a72da..66077bde776e9 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -324,6 +324,22 @@ def test_attrs_deepcopy(self): assert result.attrs == df.attrs assert result.attrs["tags"] is not df.attrs["tags"] + def test_attrs_binary_operations(self): + # GH 51607 + df_1 = DataFrame({"A": [2, 3]}) + df_2 = DataFrame({"A": [-3, 9]}) + attrs = {"info": "DataFrame"} + df_1.attrs = attrs + assert (df_1 + df_2).attrs == attrs + assert (df_2 + df_1).attrs == attrs + assert (df_2 - df_1).attrs == attrs + assert (df_2 / df_1).attrs == attrs + assert (df_2 * df_1).attrs == attrs + assert (df_2.add(df_1)).attrs == attrs + assert (df_2.sub(df_1)).attrs == attrs + assert (df_2.div(df_1)).attrs == attrs + assert (df_2.mul(df_1)).attrs == attrs + @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags( self, diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 79a55eb357f87..0e5b14310c114 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -164,6 +164,22 @@ def test_attrs(self): result = s + 1 assert result.attrs == {"version": 1} + def test_attrs_binary_operations(self): + # GH 51607 + s1 = Series([2, 5]) + s2 = Series([7, -1]) + attrs = {"info": "Series"} + s1.attrs = attrs + assert (s1 + s2).attrs == attrs + assert (s2 + s1).attrs == attrs + assert (s2 - s1).attrs == attrs + assert (s2 / s1).attrs == attrs + assert (s2 * s1).attrs == attrs + assert (s2.add(s1)).attrs == attrs + assert (s2.sub(s1)).attrs == attrs + assert (s2.div(s1)).attrs == attrs + assert (s2.mul(s1)).attrs == attrs + @pytest.mark.xfail( using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" ) From 50118d726b06f45effb2154db3c8925ad09285b5 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Thu, 5 Sep 2024 09:29:19 -0400 Subject: [PATCH 03/18] Add getattr as other might not possess attrs as attribute --- pandas/core/base.py | 2 +- pandas/core/frame.py | 4 ++-- pandas/core/series.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 53117f6ba3f20..8b95cdcfcbebe 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1466,7 +1466,7 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): result = ops.arithmetic_op(lvalues, rvalues, op) - if not self.attrs and other.attrs: + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): self.attrs = other.attrs return self._construct_result(result, name=res_name) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4de8cd0df9cc..23c1dee115aa1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7831,7 +7831,7 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): new_data = self._dispatch_frame_op(other, op, axis=axis) - if not self.attrs and other.attrs: + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): self.attrs = other.attrs return self._construct_result(new_data) @@ -8189,7 +8189,7 @@ def _flex_arith_method( new_data = self._dispatch_frame_op(other, op) - if not self.attrs and other.attrs: + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): self.attrs = other.attrs return self._construct_result(new_data) diff --git a/pandas/core/series.py b/pandas/core/series.py index 83c932c0534d2..5d34af4993d80 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5917,7 +5917,7 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: name = ops.get_op_result_name(self, other) - if not this.attrs and other.attrs: + if not getattr(this, "attrs", None) and getattr(other, "attrs", None): this.attrs = other.attrs out = this._construct_result(result, name) From 2607f9cfe6b96332f64158990b56334c32bedaa6 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 11 Sep 2024 10:45:07 -0400 Subject: [PATCH 04/18] Fix some tests in pandas/tests/generic/test_finalize.py::test_binops --- pandas/core/base.py | 5 ++--- pandas/core/frame.py | 13 +++++++++---- pandas/core/series.py | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 8b95cdcfcbebe..d9472bdd13c36 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1454,6 +1454,8 @@ def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: return algorithms.duplicated(arr, keep=keep) def _arith_method(self, other, op): + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs res_name = ops.get_op_result_name(self, other) lvalues = self._values @@ -1466,9 +1468,6 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): result = ops.arithmetic_op(lvalues, rvalues, op) - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs - return self._construct_result(result, name=res_name) def _construct_result(self, result, name): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce3e855f85bd5..a24afd9a63235 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7813,6 +7813,9 @@ class diet def _cmp_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs + self, other = self._align_for_op(other, axis, flex=False, level=None) # See GH#4537 for discussion of scalar op behavior @@ -7820,6 +7823,9 @@ def _cmp_method(self, other, op): return self._construct_result(new_data) def _arith_method(self, other, op): + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs + if self._should_reindex_frame_op(other, op, 1, None, None): return self._arith_method_with_reindex(other, op) @@ -7830,10 +7836,6 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): new_data = self._dispatch_frame_op(other, op, axis=axis) - - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs - return self._construct_result(new_data) _logical_method = _arith_method @@ -8230,6 +8232,9 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): self, other = self._align_for_op(other, axis, flex=True, level=level) + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs + new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5d34af4993d80..fde497bb977fd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5836,6 +5836,9 @@ def to_period( def _cmp_method(self, other, op): res_name = ops.get_op_result_name(self, other) + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs + if isinstance(other, Series) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") @@ -5847,6 +5850,8 @@ def _cmp_method(self, other, op): return self._construct_result(res_values, name=res_name) def _logical_method(self, other, op): + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs res_name = ops.get_op_result_name(self, other) self, other = self._align_for_op(other, align_asobject=True) From 87abede1f4a78f23ad9b0e32418aa4712ded22a6 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Fri, 13 Sep 2024 10:11:49 -0400 Subject: [PATCH 05/18] remove xfail tests related to attrs --- pandas/tests/generic/test_finalize.py | 47 --------------------------- 1 file changed, 47 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 433e559ef620e..d48baf46e50c5 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -427,53 +427,6 @@ def test_binops(request, args, annotate, all_binary_operators): if annotate == "right" and isinstance(right, int): pytest.skip("right is an int and doesn't support .attrs") - if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both": - if not all_binary_operators.__name__.startswith("r"): - if annotate == "right" and isinstance(left, type(right)): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when right has " - f"attrs and both are {type(left)}" - ) - ) - if not isinstance(left, type(right)): - if annotate == "left" and isinstance(left, pd.Series): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when the " - "objects are different Series has attrs" - ) - ) - elif annotate == "right" and isinstance(right, pd.Series): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when the " - "objects are different Series has attrs" - ) - ) - else: - if annotate == "left" and isinstance(left, type(right)): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when left has " - f"attrs and both are {type(left)}" - ) - ) - if not isinstance(left, type(right)): - if annotate == "right" and isinstance(right, pd.Series): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when the " - "objects are different Series has attrs" - ) - ) - elif annotate == "left" and isinstance(left, pd.Series): - request.applymarker( - pytest.mark.xfail( - reason=f"{all_binary_operators} doesn't work when the " - "objects are different Series has attrs" - ) - ) if annotate in {"left", "both"} and not isinstance(left, int): left.attrs = {"a": 1} if annotate in {"right", "both"} and not isinstance(right, int): From 742e3b1fc1f0f52fa295f0f66dac4d7e0b274aec Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 24 Sep 2024 09:51:37 -0400 Subject: [PATCH 06/18] Add all_binary_operators --- pandas/tests/frame/test_api.py | 17 +++++------------ pandas/tests/series/test_api.py | 17 +++++------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index e6fcf7b5042e3..725cc106abc72 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -324,21 +324,14 @@ def test_attrs_deepcopy(self): assert result.attrs == df.attrs assert result.attrs["tags"] is not df.attrs["tags"] - def test_attrs_binary_operations(self): + def test_attrs_binary_operations(self, all_binary_operators): # GH 51607 - df_1 = DataFrame({"A": [2, 3]}) - df_2 = DataFrame({"A": [-3, 9]}) + df_1 = DataFrame([1]) + df_2 = DataFrame([2]) attrs = {"info": "DataFrame"} df_1.attrs = attrs - assert (df_1 + df_2).attrs == attrs - assert (df_2 + df_1).attrs == attrs - assert (df_2 - df_1).attrs == attrs - assert (df_2 / df_1).attrs == attrs - assert (df_2 * df_1).attrs == attrs - assert (df_2.add(df_1)).attrs == attrs - assert (df_2.sub(df_1)).attrs == attrs - assert (df_2.div(df_1)).attrs == attrs - assert (df_2.mul(df_1)).attrs == attrs + assert all_binary_operators(df_1, df_2).attrs == attrs + assert all_binary_operators(df_2, df_1).attrs == attrs @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags( diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 0e5b14310c114..4daa686c0b635 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -164,21 +164,14 @@ def test_attrs(self): result = s + 1 assert result.attrs == {"version": 1} - def test_attrs_binary_operations(self): + def test_attrs_binary_operations(self, all_binary_operators): # GH 51607 - s1 = Series([2, 5]) - s2 = Series([7, -1]) + s1 = Series([1]) + s2 = Series([2]) attrs = {"info": "Series"} s1.attrs = attrs - assert (s1 + s2).attrs == attrs - assert (s2 + s1).attrs == attrs - assert (s2 - s1).attrs == attrs - assert (s2 / s1).attrs == attrs - assert (s2 * s1).attrs == attrs - assert (s2.add(s1)).attrs == attrs - assert (s2.sub(s1)).attrs == attrs - assert (s2.div(s1)).attrs == attrs - assert (s2.mul(s1)).attrs == attrs + assert all_binary_operators(s1, s2).attrs == attrs + assert all_binary_operators(s2, s1).attrs == attrs @pytest.mark.xfail( using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" From c2c87ddf1d10aaa11233aecd8286375f6afa96a4 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Mon, 2 Dec 2024 17:08:47 -0500 Subject: [PATCH 07/18] use __finalize__ for attrs bin ops in base.py --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 28d90dae060e3..98d3d470da2c7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1464,7 +1464,7 @@ def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: def _arith_method(self, other, op): if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) res_name = ops.get_op_result_name(self, other) lvalues = self._values From 4da178638fcf456f4b553b5b529a22a9f1409133 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Mon, 2 Dec 2024 17:08:57 -0500 Subject: [PATCH 08/18] use __finalize__ for attrs bin ops in frame.py --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4222acfabd28..01d5a14c4a38e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7829,7 +7829,7 @@ def _cmp_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) self, other = self._align_for_op(other, axis, flex=False, level=None) @@ -7839,7 +7839,7 @@ def _cmp_method(self, other, op): def _arith_method(self, other, op): if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) if self._should_reindex_frame_op(other, op, 1, None, None): return self._arith_method_with_reindex(other, op) @@ -8207,7 +8207,7 @@ def _flex_arith_method( new_data = self._dispatch_frame_op(other, op) if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) return self._construct_result(new_data) @@ -8248,7 +8248,7 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): self, other = self._align_for_op(other, axis, flex=True, level=level) if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) From 643692696b51f265def93a9d9decaf86e450f151 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Mon, 2 Dec 2024 17:09:07 -0500 Subject: [PATCH 09/18] use __finalize__ for attrs bin ops in series.py --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 457aad6edc875..21e01b2d7dc99 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5862,7 +5862,7 @@ def _cmp_method(self, other, op): res_name = ops.get_op_result_name(self, other) if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs + self.__finalize__(other) if isinstance(other, Series) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") @@ -5948,7 +5948,7 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: name = ops.get_op_result_name(self, other) if not getattr(this, "attrs", None) and getattr(other, "attrs", None): - this.attrs = other.attrs + this.__finalize__(other) out = this._construct_result(result, name) return cast(Series, out) From ef3c0b5843116f216ce8264ee5e8b67f393fe6ff Mon Sep 17 00:00:00 2001 From: fbourgey Date: Sat, 1 Feb 2025 12:11:03 -0500 Subject: [PATCH 10/18] ENH: Ensure attrs are copied from other in Series arithmetic operations. Moved from base.py to series.py --- pandas/core/base.py | 2 -- pandas/core/series.py | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 98d3d470da2c7..61a7c079d87f8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1463,8 +1463,6 @@ def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: return algorithms.duplicated(arr, keep=keep) def _arith_method(self, other, op): - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) res_name = ops.get_op_result_name(self, other) lvalues = self._values diff --git a/pandas/core/series.py b/pandas/core/series.py index 21e01b2d7dc99..ae9037c7ccf7c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5888,6 +5888,8 @@ def _logical_method(self, other, op): def _arith_method(self, other, op): self, other = self._align_for_op(other) + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.attrs = other.attrs return base.IndexOpsMixin._arith_method(self, other, op) def _align_for_op(self, right, align_asobject: bool = False): From e9c3e910b77e9c5ea877ac787f48c6a4c2c9fe3b Mon Sep 17 00:00:00 2001 From: fbourgey Date: Sun, 30 Mar 2025 14:29:04 -0400 Subject: [PATCH 11/18] REF: Refactor binary operation tests for DataFrame and Series attributes --- pandas/tests/frame/test_api.py | 9 --------- pandas/tests/generic/test_finalize.py | 16 ++++++++++++++++ pandas/tests/series/test_api.py | 9 --------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 0e325a5cb6e85..2b0bf1b0576f9 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -324,15 +324,6 @@ def test_attrs_deepcopy(self): assert result.attrs == df.attrs assert result.attrs["tags"] is not df.attrs["tags"] - def test_attrs_binary_operations(self, all_binary_operators): - # GH 51607 - df_1 = DataFrame([1]) - df_2 = DataFrame([2]) - attrs = {"info": "DataFrame"} - df_1.attrs = attrs - assert all_binary_operators(df_1, df_2).attrs == attrs - assert all_binary_operators(df_2, df_1).attrs == attrs - @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags( self, diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 36b4a37fd40fe..97957822fe70a 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -450,6 +450,22 @@ def test_binops(request, args, annotate, all_binary_operators): assert result.attrs == {"a": 1} +@pytest.mark.parametrize( + "left, right, attrs", + [ + (pd.DataFrame([1]), pd.DataFrame([2]), {"a": 1}), + (pd.Series([1]), pd.Series([2]), {"a": 1}), + (pd.Series([1]), pd.DataFrame([2]), {"a": 1}), + (pd.DataFrame([1]), pd.Series([2]), {"a": 1}), + ], +) +def test_attrs_binary_operations(all_binary_operators, left, right, attrs): + # GH 51607 + left.attrs = attrs + assert all_binary_operators(left, right).attrs == attrs + assert all_binary_operators(right, left).attrs == attrs + + # ---------------------------------------------------------------------------- # Accessors diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index b44874ee4f673..4b369bb0bc869 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -160,15 +160,6 @@ def test_attrs(self): result = s + 1 assert result.attrs == {"version": 1} - def test_attrs_binary_operations(self, all_binary_operators): - # GH 51607 - s1 = Series([1]) - s2 = Series([2]) - attrs = {"info": "Series"} - s1.attrs = attrs - assert all_binary_operators(s1, s2).attrs == attrs - assert all_binary_operators(s2, s1).attrs == attrs - def test_inspect_getmembers(self): # GH38782 ser = Series(dtype=object) From b72a049ba711861289386e6e951f5b1805aa24bb Mon Sep 17 00:00:00 2001 From: fbourgey Date: Sun, 30 Mar 2025 14:30:34 -0400 Subject: [PATCH 12/18] REF: Enhance _construct_result method to accept 'other' parameter for improved attribute handling --- pandas/core/base.py | 4 ++-- pandas/core/frame.py | 18 +++++++----------- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 27 +++++++++++---------------- 4 files changed, 22 insertions(+), 31 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index a64cd8633c1db..927fb1e23365a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1480,9 +1480,9 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): result = ops.arithmetic_op(lvalues, rvalues, op) - return self._construct_result(result, name=res_name) + return self._construct_result(result, name=res_name, other=other) - def _construct_result(self, result, name): + def _construct_result(self, result, name, other=None): """ Construct an appropriately-wrapped result from the ArrayLike result of an arithmetic-like operation. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0fb148e4218de..127bb7545dfc8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7882,7 +7882,7 @@ def _cmp_method(self, other, op): # See GH#4537 for discussion of scalar op behavior new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) + return self._construct_result(new_data, other=other) def _arith_method(self, other, op): if not getattr(self, "attrs", None) and getattr(other, "attrs", None): @@ -7898,7 +7898,7 @@ def _arith_method(self, other, op): with np.errstate(all="ignore"): new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) + return self._construct_result(new_data, other=other) _logical_method = _arith_method @@ -8275,12 +8275,9 @@ def _flex_arith_method( new_data = self._dispatch_frame_op(other, op) - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) + return self._construct_result(new_data, other=other) - return self._construct_result(new_data) - - def _construct_result(self, result) -> DataFrame: + def _construct_result(self, result, other=None) -> DataFrame: """ Wrap the result of an arithmetic, comparison, or logical operation. @@ -8292,6 +8289,8 @@ def _construct_result(self, result) -> DataFrame: ------- DataFrame """ + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.__finalize__(other) out = self._constructor(result, copy=False).__finalize__(self) # Pin columns instead of passing to constructor for compat with # non-unique columns case @@ -8316,11 +8315,8 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): self, other = self._align_for_op(other, axis, flex=True, level=level) - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) - new_data = self._dispatch_frame_op(other, op, axis=axis) - return self._construct_result(new_data) + return self._construct_result(new_data, other=other) @Appender(ops.make_flex_doc("eq", "dataframe")) def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2079ff8fd2873..c8a6f6a603947 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7148,10 +7148,10 @@ def _logical_method(self, other, op): rvalues = extract_array(other, extract_numpy=True, extract_range=True) res_values = ops.logical_op(lvalues, rvalues, op) - return self._construct_result(res_values, name=res_name) + return self._construct_result(res_values, name=res_name, other=other) @final - def _construct_result(self, result, name): + def _construct_result(self, result, name, other=None): if isinstance(result, tuple): return ( Index(result[0], name=name, dtype=result[0].dtype), diff --git a/pandas/core/series.py b/pandas/core/series.py index c58a523ba3ebc..672260e524726 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5850,9 +5850,6 @@ def to_period( def _cmp_method(self, other, op): res_name = ops.get_op_result_name(self, other) - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) - if isinstance(other, Series) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") @@ -5861,11 +5858,9 @@ def _cmp_method(self, other, op): res_values = ops.comparison_op(lvalues, rvalues, op) - return self._construct_result(res_values, name=res_name) + return self._construct_result(res_values, name=res_name, other=other) def _logical_method(self, other, op): - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs res_name = ops.get_op_result_name(self, other) self, other = self._align_for_op(other, align_asobject=True) @@ -5873,12 +5868,10 @@ def _logical_method(self, other, op): rvalues = extract_array(other, extract_numpy=True, extract_range=True) res_values = ops.logical_op(lvalues, rvalues, op) - return self._construct_result(res_values, name=res_name) + return self._construct_result(res_values, name=res_name, other=other) def _arith_method(self, other, op): self, other = self._align_for_op(other) - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.attrs = other.attrs return base.IndexOpsMixin._arith_method(self, other, op) def _align_for_op(self, right, align_asobject: bool = False): @@ -5938,14 +5931,14 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: name = ops.get_op_result_name(self, other) - if not getattr(this, "attrs", None) and getattr(other, "attrs", None): - this.__finalize__(other) - - out = this._construct_result(result, name) + out = this._construct_result(result, name, other) return cast(Series, out) def _construct_result( - self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable + self, + result: ArrayLike | tuple[ArrayLike, ArrayLike], + name: Hashable, + other=None, ) -> Series | tuple[Series, Series]: """ Construct an appropriately-labelled Series from the result of an op. @@ -5960,11 +5953,13 @@ def _construct_result( Series In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. """ + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.__finalize__(other) if isinstance(result, tuple): # produced by divmod or rdivmod - res1 = self._construct_result(result[0], name=name) - res2 = self._construct_result(result[1], name=name) + res1 = self._construct_result(result[0], name=name, other=other) + res2 = self._construct_result(result[1], name=name, other=other) # GH#33427 assertions to keep mypy happy assert isinstance(res1, Series) From 1e6197c4de23dece6b7770a0e19a1797a0574262 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 1 Apr 2025 17:37:34 -0400 Subject: [PATCH 13/18] REF: Simplify test_attrs_binary_operations by parameterizing left and right inputs --- pandas/tests/generic/test_finalize.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 97957822fe70a..4b841b54c488b 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -450,18 +450,14 @@ def test_binops(request, args, annotate, all_binary_operators): assert result.attrs == {"a": 1} -@pytest.mark.parametrize( - "left, right, attrs", - [ - (pd.DataFrame([1]), pd.DataFrame([2]), {"a": 1}), - (pd.Series([1]), pd.Series([2]), {"a": 1}), - (pd.Series([1]), pd.DataFrame([2]), {"a": 1}), - (pd.DataFrame([1]), pd.Series([2]), {"a": 1}), - ], -) -def test_attrs_binary_operations(all_binary_operators, left, right, attrs): +@pytest.mark.parametrize("left", [pd.Series, pd.DataFrame]) +@pytest.mark.parametrize("right", [pd.Series, pd.DataFrame]) +def test_attrs_binary_operations(all_binary_operators, left, right): # GH 51607 + attrs = {"a": 1} + left = left([1]) left.attrs = attrs + right = right([2]) assert all_binary_operators(left, right).attrs == attrs assert all_binary_operators(right, left).attrs == attrs From 3a80cf909c45424a35cb2826ec2b28a4d1926dd5 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Tue, 1 Apr 2025 17:37:40 -0400 Subject: [PATCH 14/18] Refactor DataFrame and Series methods to improve attribute handling and finalize behavior --- pandas/core/frame.py | 9 +++------ pandas/core/series.py | 7 ++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 127bb7545dfc8..b2a26c26091df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7875,9 +7875,6 @@ class diet def _cmp_method(self, other, op): axis: Literal[1] = 1 # only relevant for Series other case - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) - self, other = self._align_for_op(other, axis, flex=False, level=None) # See GH#4537 for discussion of scalar op behavior @@ -7885,9 +7882,6 @@ def _cmp_method(self, other, op): return self._construct_result(new_data, other=other) def _arith_method(self, other, op): - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) - if self._should_reindex_frame_op(other, op, 1, None, None): return self._arith_method_with_reindex(other, op) @@ -8107,6 +8101,9 @@ def _align_for_op( left : DataFrame right : Any """ + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + self.__finalize__(other) + left, right = self, other def to_series(right): diff --git a/pandas/core/series.py b/pandas/core/series.py index 672260e524726..1af9ffa9c75dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5938,7 +5938,7 @@ def _construct_result( self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable, - other=None, + other: AnyArrayLike | DataFrame | None = None, ) -> Series | tuple[Series, Series]: """ Construct an appropriately-labelled Series from the result of an op. @@ -5947,14 +5947,13 @@ def _construct_result( ---------- result : ndarray or ExtensionArray name : Label + other : Series, DataFrame or array-like, default None Returns ------- Series In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. """ - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) if isinstance(result, tuple): # produced by divmod or rdivmod @@ -5975,6 +5974,8 @@ def _construct_result( # Set the result's name after __finalize__ is called because __finalize__ # would set it back to self.name out.name = name + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + out.__finalize__(other) return out def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): From e342a6eb11611e4ee1e050d78eb6edd1fc670904 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 2 Apr 2025 08:10:09 -0400 Subject: [PATCH 15/18] Refactor DataFrame alignment logic to improve attribute consistency --- pandas/core/frame.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b2a26c26091df..8591a76f2a6f1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8088,8 +8088,7 @@ def _align_for_op( Parameters ---------- - left : DataFrame - right : Any + other : Any axis : int flex : bool or None, default False Whether this is a flex op, in which case we reindex. @@ -8101,8 +8100,6 @@ def _align_for_op( left : DataFrame right : Any """ - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) left, right = self, other @@ -8203,7 +8200,6 @@ def to_series(right): "`left, right = left.align(right, axis=1)` " "before operating." ) - left, right = left.align( right, join="outer", @@ -8212,6 +8208,9 @@ def to_series(right): ) right = left._maybe_align_series_as_frame(right, axis) + # Ensure attributes are consistent between the aligned and original objects + if right.attrs != getattr(other, "attrs", {}): + right.attrs = getattr(other, "attrs", {}).copy() return left, right def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): From c2511968e875746f483e7fbdf064d341de9a88e1 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Wed, 2 Apr 2025 13:59:01 -0400 Subject: [PATCH 16/18] Refactor DataFrame and Series methods to enhance attribute finalization logic --- pandas/core/frame.py | 10 +++------- pandas/core/series.py | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8591a76f2a6f1..fc132f74a36e7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8207,10 +8207,6 @@ def to_series(right): level=level, ) right = left._maybe_align_series_as_frame(right, axis) - - # Ensure attributes are consistent between the aligned and original objects - if right.attrs != getattr(other, "attrs", {}): - right.attrs = getattr(other, "attrs", {}).copy() return left, right def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): @@ -8239,7 +8235,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): index=self.index, columns=self.columns, dtype=rvalues.dtype, - ) + ).__finalize__(series) def _flex_arith_method( self, other, op, *, axis: Axis = "columns", level=None, fill_value=None @@ -8285,13 +8281,13 @@ def _construct_result(self, result, other=None) -> DataFrame: ------- DataFrame """ - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - self.__finalize__(other) out = self._constructor(result, copy=False).__finalize__(self) # Pin columns instead of passing to constructor for compat with # non-unique columns case out.columns = self.columns out.index = self.index + if not getattr(self, "attrs", None) and getattr(other, "attrs", None): + out = out.__finalize__(other) return out def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: diff --git a/pandas/core/series.py b/pandas/core/series.py index 1af9ffa9c75dd..409c195a87549 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5970,12 +5970,12 @@ def _construct_result( dtype = getattr(result, "dtype", None) out = self._constructor(result, index=self.index, dtype=dtype, copy=False) out = out.__finalize__(self) + if getattr(other, "attrs", None): + out.__finalize__(other) # Set the result's name after __finalize__ is called because __finalize__ # would set it back to self.name out.name = name - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - out.__finalize__(other) return out def _flex_method(self, other, op, *, level=None, fill_value=None, axis: Axis = 0): From 155a9d193e4b8d8499320b447a929605bcc02a68 Mon Sep 17 00:00:00 2001 From: fbourgey Date: Thu, 3 Apr 2025 13:57:50 -0400 Subject: [PATCH 17/18] Clean and remove unnecessry checks --- pandas/core/base.py | 2 +- pandas/core/frame.py | 7 +++---- pandas/core/indexes/base.py | 2 +- pandas/core/series.py | 7 +++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 927fb1e23365a..948a80ed10170 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1482,7 +1482,7 @@ def _arith_method(self, other, op): return self._construct_result(result, name=res_name, other=other) - def _construct_result(self, result, name, other=None): + def _construct_result(self, result, name, other): """ Construct an appropriately-wrapped result from the ArrayLike result of an arithmetic-like operation. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fc132f74a36e7..e8e5b61f31840 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8100,7 +8100,6 @@ def _align_for_op( left : DataFrame right : Any """ - left, right = self, other def to_series(right): @@ -8200,6 +8199,7 @@ def to_series(right): "`left, right = left.align(right, axis=1)` " "before operating." ) + left, right = left.align( right, join="outer", @@ -8269,7 +8269,7 @@ def _flex_arith_method( return self._construct_result(new_data, other=other) - def _construct_result(self, result, other=None) -> DataFrame: + def _construct_result(self, result, other) -> DataFrame: """ Wrap the result of an arithmetic, comparison, or logical operation. @@ -8286,8 +8286,7 @@ def _construct_result(self, result, other=None) -> DataFrame: # non-unique columns case out.columns = self.columns out.index = self.index - if not getattr(self, "attrs", None) and getattr(other, "attrs", None): - out = out.__finalize__(other) + out = out.__finalize__(other) return out def __divmod__(self, other) -> tuple[DataFrame, DataFrame]: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c8a6f6a603947..ff3879018674e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7151,7 +7151,7 @@ def _logical_method(self, other, op): return self._construct_result(res_values, name=res_name, other=other) @final - def _construct_result(self, result, name, other=None): + def _construct_result(self, result, name, other): if isinstance(result, tuple): return ( Index(result[0], name=name, dtype=result[0].dtype), diff --git a/pandas/core/series.py b/pandas/core/series.py index 409c195a87549..03a2ce85a08c9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5938,7 +5938,7 @@ def _construct_result( self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable, - other: AnyArrayLike | DataFrame | None = None, + other: AnyArrayLike | DataFrame, ) -> Series | tuple[Series, Series]: """ Construct an appropriately-labelled Series from the result of an op. @@ -5947,7 +5947,7 @@ def _construct_result( ---------- result : ndarray or ExtensionArray name : Label - other : Series, DataFrame or array-like, default None + other : Series, DataFrame or array-like Returns ------- @@ -5970,8 +5970,7 @@ def _construct_result( dtype = getattr(result, "dtype", None) out = self._constructor(result, index=self.index, dtype=dtype, copy=False) out = out.__finalize__(self) - if getattr(other, "attrs", None): - out.__finalize__(other) + out = out.__finalize__(other) # Set the result's name after __finalize__ is called because __finalize__ # would set it back to self.name From 45e2ad0f6560013cb71d9bc88c74485c6453a21e Mon Sep 17 00:00:00 2001 From: fbourgey Date: Thu, 3 Apr 2025 13:58:29 -0400 Subject: [PATCH 18/18] Fix: Prioritizing False if self.flags.allows_duplicate_labels or other.flags.allows_duplicate_labels is False --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53a855252d8c0..8a06c234f91f9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6076,8 +6076,10 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self: # One could make the deepcopy unconditionally, but a deepcopy # of an empty dict is 50x more expensive than the empty check. self.attrs = deepcopy(other.attrs) - - self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels + self.flags.allows_duplicate_labels = ( + self.flags.allows_duplicate_labels + and other.flags.allows_duplicate_labels + ) # For subclasses using _metadata. for name in set(self._metadata) & set(other._metadata): assert isinstance(name, str)