Skip to content

Commit

Permalink
Fix unnecessary copy in strings_to_categoricals (#298)
Browse files Browse the repository at this point in the history
Removed an explicit `init_as_actual` in `strings_to_categoricals` in favour the implicit one from modifying the dataframe.

Split the raw r/w test into two (actualized AnnData and view), and check if the warning still occurs in the view one.

Co-authored-by: Philipp A. <[email protected]>
  • Loading branch information
ivirshup and flying-sheep committed Jan 22, 2020
1 parent 19b62fe commit 52d01f3
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 18 deletions.
16 changes: 8 additions & 8 deletions anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,14 +1143,8 @@ def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
dont_modify = False # only necessary for backed views
if df is None:
dfs = [self.obs, self.var]
if self.is_view:
if not self.isbacked:
warnings.warn(
"Initializing view as actual.", ImplicitModificationWarning,
)
self._init_as_actual(self.copy())
else:
dont_modify = True
if self.is_view and self.isbacked:
dont_modify = True
else:
dfs = [df]
for df in dfs:
Expand All @@ -1173,6 +1167,12 @@ def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
"AnnData, not on this view. You might encounter this"
"error message while copying or writing to disk."
)
if self.is_view:
warnings.warn(
"Initializing view as actual.", ImplicitModificationWarning
)
# If `self` is a view, it will be actualized in the next line,
# therefore the previous warning
df[key] = c
logger.info(f"... storing {key!r} as categorical")

Expand Down
44 changes: 34 additions & 10 deletions anndata/tests/test_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import anndata as ad
from anndata._core.anndata import ImplicitModificationWarning
from anndata.tests.helpers import assert_equal


# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -37,7 +38,9 @@ def adata_raw():
np.array(data), obs=obs_dict, var=var_dict, uns=uns_dict, dtype="int32"
)
adata.raw = adata
return adata[:, [0, 1]]
# Make them different shapes
adata = adata[:, [0, 1]].copy()
return adata


# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -65,22 +68,43 @@ def test_raw_of_view(adata_raw):


def test_raw_rw(adata_raw, backing_h5ad):
with pytest.warns(
ImplicitModificationWarning, match="Initializing view as actual"
): # TODO: don’t modify adata just to write it
adata_raw.write(backing_h5ad)
adata_raw = ad.read(backing_h5ad)
adata_raw.write(backing_h5ad)
adata_read = ad.read(backing_h5ad)

assert_equal(adata_read, adata_raw, exact=True)

assert adata_raw.var_names.tolist() == ["var1", "var2"]
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]


def test_raw_view_rw(adata_raw, backing_h5ad):
# Make sure it still writes correctly if the object is a view
adata_raw_view = adata_raw[:, adata_raw.var_names]
assert_equal(adata_raw_view, adata_raw)
with pytest.warns(ImplicitModificationWarning, match="Initializing view as actual"):
adata_raw_view.write(backing_h5ad)
adata_read = ad.read(backing_h5ad)

assert_equal(adata_read, adata_raw_view, exact=True)

assert adata_raw.var_names.tolist() == ["var1", "var2"]
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]


def test_raw_backed(adata_raw, backing_h5ad):
with pytest.warns(
ImplicitModificationWarning, match="Initializing view as actual"
): # TODO: don’t modify adata just to write it
adata_raw.filename = backing_h5ad
adata_raw.filename = backing_h5ad

assert adata_raw.var_names.tolist() == ["var1", "var2"]
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
if adata_raw.raw[:, 0].X.shape[1] != 1:
pytest.xfail("Raw is broken for backed slices")
assert adata_raw.raw[:, 0].X[:].tolist() == [[1], [4], [7]]


def test_raw_view_backed(adata_raw, backing_h5ad):
adata_raw.filename = backing_h5ad

assert adata_raw.var_names.tolist() == ["var1", "var2"]
assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"]
Expand Down

0 comments on commit 52d01f3

Please sign in to comment.