You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
5.1 KiB

from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
isna,
notna,
)
import pandas._testing as tm
class TestSeriesClip:
def test_clip(self, datetime_series):
val = datetime_series.median()
assert datetime_series.clip(lower=val).min() == val
assert datetime_series.clip(upper=val).max() == val
result = datetime_series.clip(-0.5, 0.5)
expected = np.clip(datetime_series, -0.5, 0.5)
tm.assert_series_equal(result, expected)
assert isinstance(expected, Series)
def test_clip_types_and_nulls(self):
sers = [
Series([np.nan, 1.0, 2.0, 3.0]),
Series([None, "a", "b", "c"]),
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
]
for s in sers:
thresh = s[2]
lower = s.clip(lower=thresh)
upper = s.clip(upper=thresh)
assert lower[notna(lower)].min() == thresh
assert upper[notna(upper)].max() == thresh
assert list(isna(s)) == list(isna(lower))
assert list(isna(s)) == list(isna(upper))
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
# Ensure that clipping method can handle NA values with out failing
# GH#40581
if nulls_fixture is pd.NaT:
# constructor will raise, see
# test_constructor_mismatched_null_nullable_dtype
pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
s_clipped_upper = ser.clip(upper=2.0)
s_clipped_lower = ser.clip(lower=2.0)
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
tm.assert_series_equal(s_clipped_upper, expected_upper)
tm.assert_series_equal(s_clipped_lower, expected_lower)
def test_clip_with_na_args(self):
"""Should process np.nan argument as None"""
# GH#17276
s = Series([1, 2, 3])
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
# GH#19992
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
# TODO: avoid this warning here? seems like we should never be upcasting
# in the first place?
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(lower=[0, 4, np.nan])
tm.assert_series_equal(res, Series([1, 4, 3]))
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(upper=[1, np.nan, 1])
tm.assert_series_equal(res, Series([1, 2, 1]))
# GH#40420
s = Series([1, 2, 3])
result = s.clip(0, [np.nan, np.nan, np.nan])
tm.assert_series_equal(s, result)
def test_clip_against_series(self):
# GH#6966
s = Series([1.0, 1.0, 4.0])
lower = Series([1.0, 2.0, 3.0])
upper = Series([1.5, 2.5, 3.5])
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH#15390
original = Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = Series([1, 2, 3])
if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)
def test_clip_with_datetimes(self):
# GH#11838
# naive and tz-aware datetimes
t = Timestamp("2015-12-01 09:30:30")
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
result = s.clip(upper=t)
expected = Series(
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
)
tm.assert_series_equal(result, expected)
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
s = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
]
)
result = s.clip(upper=t)
expected = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
# GH-42794
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
tm.assert_series_equal(result, expected)