Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix binary operations on attrs for Series and DataFrame #59636

Merged
merged 37 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
38efbaa
Fix binary operators on attrs for Series and DataFrame
fbourgey Aug 27, 2024
15db834
Add tests for Series and DataFrame for attrs binary operations
fbourgey Aug 28, 2024
50118d7
Add getattr as other might not possess attrs as attribute
fbourgey Sep 5, 2024
953ef17
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Sep 5, 2024
bc4da65
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Sep 6, 2024
2607f9c
Fix some tests in pandas/tests/generic/test_finalize.py::test_binops
fbourgey Sep 11, 2024
b154203
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Sep 11, 2024
87abede
remove xfail tests related to attrs
fbourgey Sep 13, 2024
8559c1f
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Sep 13, 2024
742e3b1
Add all_binary_operators
fbourgey Sep 24, 2024
5178113
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Sep 24, 2024
5b71171
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Dec 1, 2024
8da62bd
Merge branch 'pandas-dev:main' into series-sum-attrs
fbourgey Dec 2, 2024
c2c87dd
use __finalize__ for attrs bin ops in base.py
fbourgey Dec 2, 2024
4da1786
use __finalize__ for attrs bin ops in frame.py
fbourgey Dec 2, 2024
6436926
use __finalize__ for attrs bin ops in series.py
fbourgey Dec 2, 2024
50396ad
Merge branch 'main' into series-sum-attrs
fbourgey Jan 29, 2025
76b0831
Merge branch 'pandas-dev:main' into series-sum-attrs
fbourgey Jan 30, 2025
ef3c0b5
ENH: Ensure attrs are copied from other in Series arithmetic operatio…
fbourgey Feb 1, 2025
e92e431
Merge branch 'main' into series-sum-attrs
fbourgey Feb 1, 2025
311b662
Merge branch 'main' into series-sum-attrs
fbourgey Feb 1, 2025
95cb745
Merge branch 'pandas-dev:main' into series-sum-attrs
fbourgey Feb 3, 2025
c162bba
Merge branch 'main' into series-sum-attrs
fbourgey Feb 3, 2025
bd46491
Merge branch 'pandas-dev:main' into series-sum-attrs
fbourgey Feb 9, 2025
bf898c7
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Mar 29, 2025
e9c3e91
REF: Refactor binary operation tests for DataFrame and Series attributes
fbourgey Mar 30, 2025
b72a049
REF: Enhance _construct_result method to accept 'other' parameter for…
fbourgey Mar 30, 2025
3abc22d
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Mar 30, 2025
1e6197c
REF: Simplify test_attrs_binary_operations by parameterizing left and…
fbourgey Apr 1, 2025
3a80cf9
Refactor DataFrame and Series methods to improve attribute handling a…
fbourgey Apr 1, 2025
83e908f
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Apr 1, 2025
e342a6e
Refactor DataFrame alignment logic to improve attribute consistency
fbourgey Apr 2, 2025
c251196
Refactor DataFrame and Series methods to enhance attribute finalizati…
fbourgey Apr 2, 2025
96ddd0d
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Apr 2, 2025
155a9d1
Clean and remove unnecessry checks
fbourgey Apr 3, 2025
45e2ad0
Fix: Prioritizing False if self.flags.allows_duplicate_labels or othe…
fbourgey Apr 3, 2025
cf67fcf
Merge remote-tracking branch 'upstream/main' into series-sum-attrs
fbourgey Apr 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,9 +1480,9 @@ def _arith_method(self, other, op):
with np.errstate(all="ignore"):
result = ops.arithmetic_op(lvalues, rvalues, op)

return self._construct_result(result, name=res_name)
return self._construct_result(result, name=res_name, other=other)

def _construct_result(self, result, name):
def _construct_result(self, result, name, other):
"""
Construct an appropriately-wrapped result from the ArrayLike result
of an arithmetic-like operation.
Expand Down
17 changes: 8 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7879,7 +7879,7 @@ def _cmp_method(self, other, op):

# See GH#4537 for discussion of scalar op behavior
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)
return self._construct_result(new_data, other=other)

def _arith_method(self, other, op):
if self._should_reindex_frame_op(other, op, 1, None, None):
Expand All @@ -7892,7 +7892,7 @@ def _arith_method(self, other, op):

with np.errstate(all="ignore"):
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)
return self._construct_result(new_data, other=other)

_logical_method = _arith_method

Expand Down Expand Up @@ -8088,8 +8088,7 @@ def _align_for_op(

Parameters
----------
left : DataFrame
right : Any
other : Any
axis : int
flex : bool or None, default False
Whether this is a flex op, in which case we reindex.
Expand Down Expand Up @@ -8208,7 +8207,6 @@ def to_series(right):
level=level,
)
right = left._maybe_align_series_as_frame(right, axis)
Copy link
Contributor Author

@fbourgey fbourgey Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this resets the attrs of right

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would consider that a bug. attrs should be preserved in this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I fix it in this PR or raise a different issue?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can fix it in this PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested something below


return left, right

def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
Expand Down Expand Up @@ -8237,7 +8235,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
index=self.index,
columns=self.columns,
dtype=rvalues.dtype,
)
).__finalize__(series)

def _flex_arith_method(
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
Expand Down Expand Up @@ -8269,9 +8267,9 @@ def _flex_arith_method(

new_data = self._dispatch_frame_op(other, op)

return self._construct_result(new_data)
return self._construct_result(new_data, other=other)

def _construct_result(self, result) -> DataFrame:
def _construct_result(self, result, other) -> DataFrame:
"""
Wrap the result of an arithmetic, comparison, or logical operation.

Expand All @@ -8288,6 +8286,7 @@ def _construct_result(self, result) -> DataFrame:
# non-unique columns case
out.columns = self.columns
out.index = self.index
out = out.__finalize__(other)
return out

def __divmod__(self, other) -> tuple[DataFrame, DataFrame]:
Expand All @@ -8308,7 +8307,7 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None):
self, other = self._align_for_op(other, axis, flex=True, level=level)

new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)
return self._construct_result(new_data, other=other)

@Appender(ops.make_flex_doc("eq", "dataframe"))
def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6076,8 +6076,10 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
# One could make the deepcopy unconditionally, but a deepcopy
# of an empty dict is 50x more expensive than the empty check.
self.attrs = deepcopy(other.attrs)

self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
self.flags.allows_duplicate_labels = (
self.flags.allows_duplicate_labels
and other.flags.allows_duplicate_labels
)
# For subclasses using _metadata.
for name in set(self._metadata) & set(other._metadata):
assert isinstance(name, str)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7148,10 +7148,10 @@ def _logical_method(self, other, op):
rvalues = extract_array(other, extract_numpy=True, extract_range=True)

res_values = ops.logical_op(lvalues, rvalues, op)
return self._construct_result(res_values, name=res_name)
return self._construct_result(res_values, name=res_name, other=other)

@final
def _construct_result(self, result, name):
def _construct_result(self, result, name, other):
if isinstance(result, tuple):
return (
Index(result[0], name=name, dtype=result[0].dtype),
Expand Down
18 changes: 12 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5858,7 +5858,7 @@ def _cmp_method(self, other, op):

res_values = ops.comparison_op(lvalues, rvalues, op)

return self._construct_result(res_values, name=res_name)
return self._construct_result(res_values, name=res_name, other=other)

def _logical_method(self, other, op):
res_name = ops.get_op_result_name(self, other)
Expand All @@ -5868,7 +5868,7 @@ def _logical_method(self, other, op):
rvalues = extract_array(other, extract_numpy=True, extract_range=True)

res_values = ops.logical_op(lvalues, rvalues, op)
return self._construct_result(res_values, name=res_name)
return self._construct_result(res_values, name=res_name, other=other)

def _arith_method(self, other, op):
self, other = self._align_for_op(other)
Expand Down Expand Up @@ -5930,11 +5930,15 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series:
result = func(this_vals, other_vals)

name = ops.get_op_result_name(self, other)
out = this._construct_result(result, name)

out = this._construct_result(result, name, other)
return cast(Series, out)

def _construct_result(
self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable
self,
result: ArrayLike | tuple[ArrayLike, ArrayLike],
name: Hashable,
other: AnyArrayLike | DataFrame,
) -> Series | tuple[Series, Series]:
"""
Construct an appropriately-labelled Series from the result of an op.
Expand All @@ -5943,6 +5947,7 @@ def _construct_result(
----------
result : ndarray or ExtensionArray
name : Label
other : Series, DataFrame or array-like

Returns
-------
Expand All @@ -5952,8 +5957,8 @@ def _construct_result(
if isinstance(result, tuple):
# produced by divmod or rdivmod

res1 = self._construct_result(result[0], name=name)
res2 = self._construct_result(result[1], name=name)
res1 = self._construct_result(result[0], name=name, other=other)
res2 = self._construct_result(result[1], name=name, other=other)

# GH#33427 assertions to keep mypy happy
assert isinstance(res1, Series)
Expand All @@ -5965,6 +5970,7 @@ def _construct_result(
dtype = getattr(result, "dtype", None)
out = self._constructor(result, index=self.index, dtype=dtype, copy=False)
out = out.__finalize__(self)
out = out.__finalize__(other)

# Set the result's name after __finalize__ is called because __finalize__
# would set it back to self.name
Expand Down
59 changes: 12 additions & 47 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,53 +427,6 @@ def test_binops(request, args, annotate, all_binary_operators):
if annotate == "right" and isinstance(right, int):
pytest.skip("right is an int and doesn't support .attrs")

if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
if not all_binary_operators.__name__.startswith("r"):
if annotate == "right" and isinstance(left, type(right)):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when right has "
f"attrs and both are {type(left)}"
)
)
if not isinstance(left, type(right)):
if annotate == "left" and isinstance(left, pd.Series):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when the "
"objects are different Series has attrs"
)
)
elif annotate == "right" and isinstance(right, pd.Series):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when the "
"objects are different Series has attrs"
)
)
else:
if annotate == "left" and isinstance(left, type(right)):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when left has "
f"attrs and both are {type(left)}"
)
)
if not isinstance(left, type(right)):
if annotate == "right" and isinstance(right, pd.Series):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when the "
"objects are different Series has attrs"
)
)
elif annotate == "left" and isinstance(left, pd.Series):
request.applymarker(
pytest.mark.xfail(
reason=f"{all_binary_operators} doesn't work when the "
"objects are different Series has attrs"
)
)
if annotate in {"left", "both"} and not isinstance(left, int):
left.attrs = {"a": 1}
if annotate in {"right", "both"} and not isinstance(right, int):
Expand All @@ -497,6 +450,18 @@ def test_binops(request, args, annotate, all_binary_operators):
assert result.attrs == {"a": 1}


@pytest.mark.parametrize("left", [pd.Series, pd.DataFrame])
@pytest.mark.parametrize("right", [pd.Series, pd.DataFrame])
def test_attrs_binary_operations(all_binary_operators, left, right):
# GH 51607
attrs = {"a": 1}
left = left([1])
left.attrs = attrs
right = right([2])
assert all_binary_operators(left, right).attrs == attrs
assert all_binary_operators(right, left).attrs == attrs


# ----------------------------------------------------------------------------
# Accessors

Expand Down