You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
157 lines
4.7 KiB
157 lines
4.7 KiB
import numpy as np
|
|
|
|
from pandas import (
|
|
DataFrame,
|
|
Index,
|
|
MultiIndex,
|
|
RangeIndex,
|
|
Series,
|
|
)
|
|
import pandas._testing as tm
|
|
from pandas.tests.copy_view.util import get_array
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Copy/view behaviour for the values that are set in a DataFrame
|
|
|
|
|
|
def test_set_column_with_array():
|
|
# Case: setting an array as a new column (df[col] = arr) copies that data
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
arr = np.array([1, 2, 3], dtype="int64")
|
|
|
|
df["c"] = arr
|
|
|
|
# the array data is copied
|
|
assert not np.shares_memory(get_array(df, "c"), arr)
|
|
# and thus modifying the array does not modify the DataFrame
|
|
arr[0] = 0
|
|
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
|
|
|
|
|
def test_set_column_with_series(using_copy_on_write):
|
|
# Case: setting a series as a new column (df[col] = s) copies that data
|
|
# (with delayed copy with CoW)
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
ser = Series([1, 2, 3])
|
|
|
|
df["c"] = ser
|
|
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df, "c"), get_array(ser))
|
|
else:
|
|
# the series data is copied
|
|
assert not np.shares_memory(get_array(df, "c"), get_array(ser))
|
|
|
|
# and modifying the series does not modify the DataFrame
|
|
ser.iloc[0] = 0
|
|
assert ser.iloc[0] == 0
|
|
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
|
|
|
|
|
def test_set_column_with_index(using_copy_on_write):
|
|
# Case: setting an index as a new column (df[col] = idx) copies that data
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
idx = Index([1, 2, 3])
|
|
|
|
df["c"] = idx
|
|
|
|
# the index data is copied
|
|
assert not np.shares_memory(get_array(df, "c"), idx.values)
|
|
|
|
idx = RangeIndex(1, 4)
|
|
arr = idx.values
|
|
|
|
df["d"] = idx
|
|
|
|
assert not np.shares_memory(get_array(df, "d"), arr)
|
|
|
|
|
|
def test_set_columns_with_dataframe(using_copy_on_write):
|
|
# Case: setting a DataFrame as new columns copies that data
|
|
# (with delayed copy with CoW)
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
|
|
|
|
df[["c", "d"]] = df2
|
|
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
|
else:
|
|
# the data is copied
|
|
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
|
|
|
# and modifying the set DataFrame does not modify the original DataFrame
|
|
df2.iloc[0, 0] = 0
|
|
tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
|
|
|
|
|
|
def test_setitem_series_no_copy(using_copy_on_write):
|
|
# Case: setting a Series as column into a DataFrame can delay copying that data
|
|
df = DataFrame({"a": [1, 2, 3]})
|
|
rhs = Series([4, 5, 6])
|
|
rhs_orig = rhs.copy()
|
|
|
|
# adding a new column
|
|
df["b"] = rhs
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
|
|
|
df.iloc[0, 1] = 100
|
|
tm.assert_series_equal(rhs, rhs_orig)
|
|
|
|
|
|
def test_setitem_series_no_copy_single_block(using_copy_on_write):
|
|
# Overwriting an existing column that is a single block
|
|
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
|
rhs = Series([4, 5, 6])
|
|
rhs_orig = rhs.copy()
|
|
|
|
df["a"] = rhs
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(rhs), get_array(df, "a"))
|
|
|
|
df.iloc[0, 0] = 100
|
|
tm.assert_series_equal(rhs, rhs_orig)
|
|
|
|
|
|
def test_setitem_series_no_copy_split_block(using_copy_on_write):
|
|
# Overwriting an existing column that is part of a larger block
|
|
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
|
rhs = Series([4, 5, 6])
|
|
rhs_orig = rhs.copy()
|
|
|
|
df["b"] = rhs
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
|
|
|
df.iloc[0, 1] = 100
|
|
tm.assert_series_equal(rhs, rhs_orig)
|
|
|
|
|
|
def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
|
|
# Setting a Series to multiple columns will repeat the data
|
|
# (currently copying the data eagerly)
|
|
df = DataFrame(
|
|
[[1, 2, 3], [3, 4, 5]],
|
|
columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
|
|
)
|
|
rhs = Series([10, 11])
|
|
df["a"] = rhs
|
|
assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
|
|
if using_copy_on_write:
|
|
assert df._mgr._has_no_reference(0)
|
|
|
|
|
|
def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write):
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
|
|
# this should not raise any warning
|
|
with tm.assert_produces_warning(None):
|
|
df["a"] += 1
|
|
|
|
# when it is not in a chain, then it should produce a warning
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
ser = df["a"]
|
|
with tm.assert_cow_warning(warn_copy_on_write):
|
|
ser += 1
|