Skip to content

CLN: remove and udpate for outdated _item_cache #61789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):

_internal_names: list[str] = [
"_mgr",
"_item_cache",
"_cache",
"_name",
"_metadata",
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1898,10 +1898,6 @@ def _consolidate_check(self) -> None:
self._known_consolidated = True

def _consolidate_inplace(self) -> None:
# In general, _consolidate_inplace should only be called via
# DataFrame._consolidate_inplace, otherwise we will fail to invalidate
# the DataFrame's _item_cache. The exception is for newly-created
# BlockManager objects not yet attached to a DataFrame.
if not self.is_consolidated():
self.blocks = _consolidate(self.blocks)
self._is_consolidated = True
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning

from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -72,19 +70,6 @@ def test_insert_with_columns_dups(self):
)
tm.assert_frame_equal(df, exp)

def test_insert_item_cache(self, performance_warning):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
ser = df[0]
expected_warning = PerformanceWarning if performance_warning else None

with tm.assert_produces_warning(expected_warning):
for n in range(100):
df[n + 3] = df[1] * n

ser.iloc[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] != 99

def test_insert_EA_no_warning(self):
# PerformanceWarning about fragmented frame should not be raised when
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)
Expand Down
14 changes: 0 additions & 14 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,20 +207,6 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
tm.assert_frame_equal(result, expected)

def test_corr_item_cache(self):
# Check that corr does not lead to incorrect entries in item_cache

df = DataFrame({"A": range(10)})
df["B"] = range(10)[::-1]

ser = df["A"] # populate item_cache
assert len(df._mgr.blocks) == 2

_ = df.corr(numeric_only=True)

ser.iloc[0] = 99
assert df.loc[0, "A"] == 0

@pytest.mark.parametrize("length", [2, 20, 200, 2000])
def test_corr_for_constant_columns(self, length):
# GH: 37448
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,22 +721,6 @@ def test_quantile_empty_no_columns(self, interp_method):
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)

def test_quantile_item_cache(self, interp_method):
# previous behavior incorrect retained an invalid _item_cache entry
interpolation, method = interp_method
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
)
df["D"] = df["A"] * 2
ser = df["A"]
assert len(df._mgr.blocks) == 2

df.quantile(numeric_only=False, interpolation=interpolation, method=method)

ser.iloc[0] = 99
assert df.iloc[0, 0] == df["A"][0]
assert df.iloc[0, 0] != 99

def test_invalid_method(self):
with pytest.raises(ValueError, match="Invalid method: foo"):
DataFrame(range(1)).quantile(0.5, method="foo")
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,21 +592,6 @@ def test_sort_values_nat_na_position_default(self):
result = expected.sort_values(["A", "date"])
tm.assert_frame_equal(result, expected)

def test_sort_values_item_cache(self):
# previous behavior incorrect retained an invalid _item_cache entry
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
)
df["D"] = df["A"] * 2
ser = df["A"]
assert len(df._mgr.blocks) == 2

df.sort_values(by="A")

ser.iloc[0] = 99
assert df.iloc[0, 0] == df["A"][0]
assert df.iloc[0, 0] != 99

def test_sort_values_reshaping(self):
# GH 39426
values = list(range(21))
Expand Down
22 changes: 0 additions & 22 deletions pandas/tests/frame/methods/test_to_dict_of_blocks.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
MultiIndex,
)
import pandas._testing as tm
from pandas.core.arrays import NumpyExtensionArray


class TestToDictOfBlocks:
Expand All @@ -27,22 +21,6 @@ def test_no_copy_blocks(self, float_frame):
assert _last_df is not None and not _last_df[column].equals(df[column])


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_to_dict_of_blocks_item_cache():
# Calling to_dict_of_blocks should not poison item_cache
df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object))
mgr = df._mgr
assert len(mgr.blocks) == 3 # i.e. not consolidated

ser = df["b"] # populations item_cache["b"]

df._to_dict_of_blocks()

with pytest.raises(ValueError, match="read-only"):
ser.values[0] = "foo"


def test_set_change_dtype_slice():
# GH#8850
cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])
Expand Down
27 changes: 0 additions & 27 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,30 +381,3 @@ def test_update_inplace_sets_valid_block_values():

# check we haven't put a Series into any block.values
assert isinstance(df._mgr.blocks[0].values, Categorical)


def test_nonconsolidated_item_cache_take():
# https://github.com/pandas-dev/pandas/issues/35521

# create non-consolidated dataframe with object dtype columns
df = DataFrame(
{
"col1": Series(["a"], dtype=object),
}
)
df["col2"] = Series([0], dtype=object)
assert not df._mgr.is_consolidated()

# access column (item cache)
df["col1"] == "A"
# take operation
# (regression was that this consolidated but didn't reset item cache,
# resulting in an invalid cache and the .at operation not working properly)
df[df["col2"] == 0]

# now setting value should update actual dataframe
df.at[0, "col1"] = "A"

expected = DataFrame({"col1": ["A"], "col2": [0]}, dtype=object)
tm.assert_frame_equal(df, expected)
assert df.at[0, "col1"] == "A"
23 changes: 0 additions & 23 deletions pandas/tests/indexing/test_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,6 @@ def test_selection_methods_of_assigned_col():


class TestAtSetItem:
def test_at_setitem_item_cache_cleared(self):
# GH#22372 Note the multi-step construction is necessary to trigger
# the original bug. pandas/issues/22372#issuecomment-413345309
df = DataFrame(index=[0])
df["x"] = 1
df["cost"] = 2

# accessing df["cost"] adds "cost" to the _item_cache
df["cost"]

# This loc[[0]] lookup used to call _consolidate_inplace at the
# BlockManager level, which failed to clear the _item_cache
df.loc[[0]]

df.at[0, "x"] = 4
df.at[0, "cost"] = 789

expected = DataFrame({"x": [4], "cost": 789}, index=[0])
tm.assert_frame_equal(df, expected)

# And in particular, check that the _item_cache has updated correctly.
tm.assert_series_equal(df["cost"], expected["cost"])

def test_at_setitem_mixed_index_assignment(self):
# GH#19860
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
Expand Down
29 changes: 0 additions & 29 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,6 @@


class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self):
# this is chained assignment, but will 'work'
with option_context("chained_assignment", None):
# #3970
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})

# Creates a second float block
df["cc"] = 0.0

# caches a reference to the 'bb' series
df["bb"]

# Assignment to wrong series
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.17
tm.assert_almost_equal(df["bb"][0], 2.2)

@pytest.mark.parametrize("do_ref", [True, False])
def test_setitem_cache_updating(self, do_ref):
# GH 5424
Expand Down Expand Up @@ -89,18 +72,6 @@ def test_setitem_cache_updating_slices(self):
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])

def test_altering_series_clears_parent_cache(self):
# GH #33675
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
ser = df["A"]

# Adding a new entry to ser swaps in a new array, so "A" needs to
# be removed from df._item_cache
ser["c"] = 5
assert len(ser) == 3
assert df["A"] is not ser
assert len(df["A"]) == 2


class TestChaining:
def test_setitem_chained_setfault(self):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,8 +735,6 @@ def test_reindex_items(self):
mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")

reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
# reindex_axis does not consolidate_inplace, as that risks failing to
# invalidate _item_cache
assert not reindexed.is_consolidated()

tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"]))
Expand Down
Loading