You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

205 lines
6.7 KiB

import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
Series,
date_range,
)
import pandas._testing as tm
class TestDataFrameUpdate:
def test_update_nan(self):
# #15593 #15617
# test 1
df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)})
df2 = DataFrame({"A": [None, 2, 3]})
expected = df1.copy()
df1.update(df2, overwrite=False)
tm.assert_frame_equal(df1, expected)
# test 2
df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)})
df2 = DataFrame({"A": [None, 2, 3]})
expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)})
df1.update(df2, overwrite=False)
tm.assert_frame_equal(df1, expected)
def test_update(self):
df = DataFrame(
[[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
)
other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
df.update(other)
expected = DataFrame(
[[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]]
)
tm.assert_frame_equal(df, expected)
def test_update_dtypes(self):
# gh 3016
df = DataFrame(
[[1.0, 2.0, 1, False, True], [4.0, 5.0, 2, True, False]],
columns=["A", "B", "int", "bool1", "bool2"],
)
other = DataFrame(
[[45, 45, 3, True]], index=[0], columns=["A", "B", "int", "bool1"]
)
df.update(other)
expected = DataFrame(
[[45.0, 45.0, 3, True, True], [4.0, 5.0, 2, True, False]],
columns=["A", "B", "int", "bool1", "bool2"],
)
tm.assert_frame_equal(df, expected)
def test_update_nooverwrite(self):
df = DataFrame(
[[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
)
other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
df.update(other, overwrite=False)
expected = DataFrame(
[[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]]
)
tm.assert_frame_equal(df, expected)
def test_update_filtered(self):
df = DataFrame(
[[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
)
other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
df.update(other, filter_func=lambda x: x > 2)
expected = DataFrame(
[[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]]
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"bad_kwarg, exception, msg",
[
# errors must be 'ignore' or 'raise'
({"errors": "something"}, ValueError, "The parameter errors must.*"),
({"join": "inner"}, NotImplementedError, "Only left join is supported"),
],
)
def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg):
df = DataFrame([[1.5, 1, 3.0]])
with pytest.raises(exception, match=msg):
df.update(df, **bad_kwarg)
def test_update_raise_on_overlap(self):
df = DataFrame(
[[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
)
other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2])
with pytest.raises(ValueError, match="Data overlaps"):
df.update(other, errors="raise")
def test_update_from_non_df(self):
d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])}
df = DataFrame(d)
d["a"] = Series([5, 6, 7, 8])
df.update(d)
expected = DataFrame(d)
tm.assert_frame_equal(df, expected)
d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
df = DataFrame(d)
d["a"] = [5, 6, 7, 8]
df.update(d)
expected = DataFrame(d)
tm.assert_frame_equal(df, expected)
def test_update_datetime_tz(self):
# GH 25807
result = DataFrame([pd.Timestamp("2019", tz="UTC")])
with tm.assert_produces_warning(None):
result.update(result)
expected = DataFrame([pd.Timestamp("2019", tz="UTC")])
tm.assert_frame_equal(result, expected)
def test_update_datetime_tz_in_place(self, using_copy_on_write, warn_copy_on_write):
# https://github.com/pandas-dev/pandas/issues/56227
result = DataFrame([pd.Timestamp("2019", tz="UTC")])
orig = result.copy()
view = result[:]
with tm.assert_produces_warning(
FutureWarning if warn_copy_on_write else None, match="Setting a value"
):
result.update(result + pd.Timedelta(days=1))
expected = DataFrame([pd.Timestamp("2019-01-02", tz="UTC")])
tm.assert_frame_equal(result, expected)
if not using_copy_on_write:
tm.assert_frame_equal(view, expected)
else:
tm.assert_frame_equal(view, orig)
def test_update_with_different_dtype(self, using_copy_on_write):
# GH#3217
df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
df["c"] = np.nan
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.update({"c": Series(["foo"], index=[0])})
expected = DataFrame(
{
"a": [1, 3],
"b": [np.nan, 2],
"c": Series(["foo", np.nan], dtype="object"),
}
)
tm.assert_frame_equal(df, expected)
@td.skip_array_manager_invalid_test
def test_update_modify_view(
self, using_copy_on_write, warn_copy_on_write, using_infer_string
):
# GH#47188
df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
df2_orig = df2.copy()
result_view = df2[:]
# TODO(CoW-warn) better warning message
with tm.assert_cow_warning(warn_copy_on_write):
df2.update(df)
expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
tm.assert_frame_equal(df2, expected)
if using_copy_on_write or using_infer_string:
tm.assert_frame_equal(result_view, df2_orig)
else:
tm.assert_frame_equal(result_view, expected)
def test_update_dt_column_with_NaT_create_column(self):
# GH#16713
df = DataFrame({"A": [1, None], "B": [pd.NaT, pd.to_datetime("2016-01-01")]})
df2 = DataFrame({"A": [2, 3]})
df.update(df2, overwrite=False)
expected = DataFrame(
{"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]}
)
tm.assert_frame_equal(df, expected)