You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
500 lines
14 KiB
500 lines
14 KiB
7 months ago
|
"""
|
||
|
Also test support for datetime64[ns] in Series / DataFrame
|
||
|
"""
|
||
|
from datetime import (
|
||
|
datetime,
|
||
|
timedelta,
|
||
|
)
|
||
|
import re
|
||
|
|
||
|
from dateutil.tz import (
|
||
|
gettz,
|
||
|
tzutc,
|
||
|
)
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
import pytz
|
||
|
|
||
|
from pandas._libs import index as libindex
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
Timestamp,
|
||
|
date_range,
|
||
|
period_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
def test_fancy_getitem():
|
||
|
dti = date_range(
|
||
|
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||
|
)
|
||
|
|
||
|
s = Series(np.arange(len(dti)), index=dti)
|
||
|
|
||
|
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
assert s[48] == 48
|
||
|
assert s["1/2/2009"] == 48
|
||
|
assert s["2009-1-2"] == 48
|
||
|
assert s[datetime(2009, 1, 2)] == 48
|
||
|
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||
|
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||
|
s["2009-1-3"]
|
||
|
tm.assert_series_equal(
|
||
|
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_fancy_setitem():
|
||
|
dti = date_range(
|
||
|
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||
|
)
|
||
|
|
||
|
s = Series(np.arange(len(dti)), index=dti)
|
||
|
|
||
|
msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
s[48] = -1
|
||
|
assert s.iloc[48] == -1
|
||
|
s["1/2/2009"] = -2
|
||
|
assert s.iloc[48] == -2
|
||
|
s["1/2/2009":"2009-06-05"] = -3
|
||
|
assert (s[48:54] == -3).all()
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
|
||
|
def test_getitem_setitem_datetime_tz(tz_source):
|
||
|
if tz_source == "pytz":
|
||
|
tzget = pytz.timezone
|
||
|
else:
|
||
|
# handle special case for utc in dateutil
|
||
|
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
|
||
|
|
||
|
N = 50
|
||
|
# testing with timezone, GH #2785
|
||
|
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
|
||
|
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||
|
|
||
|
# also test Timestamp tz handling, GH #2789
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 09:00:00+00:00"] = 0
|
||
|
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 03:00:00-06:00"] = 0
|
||
|
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
# repeat with datetimes
|
||
|
result = ts.copy()
|
||
|
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
|
||
|
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
result = ts.copy()
|
||
|
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
|
||
|
dt = dt.to_pydatetime()
|
||
|
result[dt] = 0
|
||
|
result[dt] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
|
||
|
def test_getitem_setitem_datetimeindex():
|
||
|
N = 50
|
||
|
# testing with timezone, GH #2785
|
||
|
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
|
||
|
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||
|
|
||
|
result = ts["1990-01-01 04:00:00"]
|
||
|
expected = ts.iloc[4]
|
||
|
assert result == expected
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 04:00:00"] = 0
|
||
|
result["1990-01-01 04:00:00"] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||
|
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
lb = "1990-01-01 04:00:00"
|
||
|
rb = "1990-01-01 07:00:00"
|
||
|
# GH#18435 strings get a pass from tzawareness compat
|
||
|
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
lb = "1990-01-01 04:00:00-0500"
|
||
|
rb = "1990-01-01 07:00:00-0500"
|
||
|
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# But we do not give datetimes a pass on tzawareness compat
|
||
|
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||
|
naive = datetime(1990, 1, 1, 4)
|
||
|
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
|
||
|
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||
|
# GH#36148 as of 2.0 we require tzawareness-compat
|
||
|
ts[key]
|
||
|
|
||
|
result = ts.copy()
|
||
|
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
|
||
|
# so setting it as a new key casts to object rather than matching
|
||
|
# rng[4]
|
||
|
result[naive] = ts.iloc[4]
|
||
|
assert result.index.dtype == object
|
||
|
tm.assert_index_equal(result.index[:-1], rng.astype(object))
|
||
|
assert result.index[-1] == naive
|
||
|
|
||
|
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
# GH#36148 require tzawareness compat as of 2.0
|
||
|
ts[naive : datetime(1990, 1, 1, 7)]
|
||
|
|
||
|
result = ts.copy()
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
# GH#36148 require tzawareness compat as of 2.0
|
||
|
result[naive : datetime(1990, 1, 1, 7)] = 0
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
# GH#36148 require tzawareness compat as of 2.0
|
||
|
result[naive : datetime(1990, 1, 1, 7)] = 99
|
||
|
# the __setitems__ here failed, so result should still match ts
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
lb = naive
|
||
|
rb = datetime(1990, 1, 1, 7)
|
||
|
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
# tznaive vs tzaware comparison is invalid
|
||
|
# see GH#18376, GH#18162
|
||
|
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||
|
|
||
|
lb = Timestamp(naive).tz_localize(rng.tzinfo)
|
||
|
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||
|
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts[ts.index[4]]
|
||
|
expected = ts.iloc[4]
|
||
|
assert result == expected
|
||
|
|
||
|
result = ts[ts.index[4:8]]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result[ts.index[4:8]] = 0
|
||
|
result.iloc[4:8] = ts.iloc[4:8]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
# also test partial date slicing
|
||
|
result = ts["1990-01-02"]
|
||
|
expected = ts[24:48]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-02"] = 0
|
||
|
result["1990-01-02"] = ts[24:48]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
|
||
|
def test_getitem_setitem_periodindex():
|
||
|
N = 50
|
||
|
rng = period_range("1/1/1990", periods=N, freq="h")
|
||
|
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||
|
|
||
|
result = ts["1990-01-01 04"]
|
||
|
expected = ts.iloc[4]
|
||
|
assert result == expected
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 04"] = 0
|
||
|
result["1990-01-01 04"] = ts.iloc[4]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||
|
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
lb = "1990-01-01 04"
|
||
|
rb = "1990-01-01 07"
|
||
|
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# GH 2782
|
||
|
result = ts[ts.index[4]]
|
||
|
expected = ts.iloc[4]
|
||
|
assert result == expected
|
||
|
|
||
|
result = ts[ts.index[4:8]]
|
||
|
expected = ts[4:8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts.copy()
|
||
|
result[ts.index[4:8]] = 0
|
||
|
result.iloc[4:8] = ts.iloc[4:8]
|
||
|
tm.assert_series_equal(result, ts)
|
||
|
|
||
|
|
||
|
def test_datetime_indexing():
|
||
|
index = date_range("1/1/2000", "1/7/2000")
|
||
|
index = index.repeat(3)
|
||
|
|
||
|
s = Series(len(index), index=index)
|
||
|
stamp = Timestamp("1/8/2000")
|
||
|
|
||
|
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||
|
s[stamp]
|
||
|
s[stamp] = 0
|
||
|
assert s[stamp] == 0
|
||
|
|
||
|
# not monotonic
|
||
|
s = Series(len(index), index=index)
|
||
|
s = s[::-1]
|
||
|
|
||
|
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||
|
s[stamp]
|
||
|
s[stamp] = 0
|
||
|
assert s[stamp] == 0
|
||
|
|
||
|
|
||
|
# test duplicates in time series
|
||
|
|
||
|
|
||
|
def test_indexing_with_duplicate_datetimeindex(
|
||
|
rand_series_with_duplicate_datetimeindex,
|
||
|
):
|
||
|
ts = rand_series_with_duplicate_datetimeindex
|
||
|
|
||
|
uniques = ts.index.unique()
|
||
|
for date in uniques:
|
||
|
result = ts[date]
|
||
|
|
||
|
mask = ts.index == date
|
||
|
total = (ts.index == date).sum()
|
||
|
expected = ts[mask]
|
||
|
if total > 1:
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
else:
|
||
|
tm.assert_almost_equal(result, expected.iloc[0])
|
||
|
|
||
|
cp = ts.copy()
|
||
|
cp[date] = 0
|
||
|
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||
|
tm.assert_series_equal(cp, expected)
|
||
|
|
||
|
key = datetime(2000, 1, 6)
|
||
|
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||
|
ts[key]
|
||
|
|
||
|
# new index
|
||
|
ts[datetime(2000, 1, 6)] = 0
|
||
|
assert ts[datetime(2000, 1, 6)] == 0
|
||
|
|
||
|
|
||
|
def test_loc_getitem_over_size_cutoff(monkeypatch):
|
||
|
# #1821
|
||
|
|
||
|
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||
|
|
||
|
# create large list of non periodic datetime
|
||
|
dates = []
|
||
|
sec = timedelta(seconds=1)
|
||
|
half_sec = timedelta(microseconds=500000)
|
||
|
d = datetime(2011, 12, 5, 20, 30)
|
||
|
n = 1100
|
||
|
for i in range(n):
|
||
|
dates.append(d)
|
||
|
dates.append(d + sec)
|
||
|
dates.append(d + sec + half_sec)
|
||
|
dates.append(d + sec + sec + half_sec)
|
||
|
d += 3 * sec
|
||
|
|
||
|
# duplicate some values in the list
|
||
|
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
|
||
|
for p in duplicate_positions:
|
||
|
dates[p + 1] = dates[p]
|
||
|
|
||
|
df = DataFrame(
|
||
|
np.random.default_rng(2).standard_normal((len(dates), 4)),
|
||
|
index=dates,
|
||
|
columns=list("ABCD"),
|
||
|
)
|
||
|
|
||
|
pos = n * 3
|
||
|
timestamp = df.index[pos]
|
||
|
assert timestamp in df.index
|
||
|
|
||
|
# it works!
|
||
|
df.loc[timestamp]
|
||
|
assert len(df.loc[[timestamp]]) > 0
|
||
|
|
||
|
|
||
|
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||
|
# GH 27136
|
||
|
|
||
|
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||
|
|
||
|
n = 1100
|
||
|
idx = period_range("1/1/2000", freq="min", periods=n)
|
||
|
assert idx._engine.over_size_threshold
|
||
|
|
||
|
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||
|
|
||
|
pos = n - 1
|
||
|
timestamp = idx[pos]
|
||
|
assert timestamp in s.index
|
||
|
|
||
|
# it works!
|
||
|
s[timestamp]
|
||
|
assert len(s.loc[[timestamp]]) > 0
|
||
|
|
||
|
|
||
|
def test_indexing_unordered():
|
||
|
# GH 2437
|
||
|
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||
|
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
|
||
|
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||
|
|
||
|
for t in ts.index:
|
||
|
expected = ts[t]
|
||
|
result = ts2[t]
|
||
|
assert expected == result
|
||
|
|
||
|
# GH 3448 (ranges)
|
||
|
def compare(slobj):
|
||
|
result = ts2[slobj].copy()
|
||
|
result = result.sort_index()
|
||
|
expected = ts[slobj]
|
||
|
expected.index = expected.index._with_freq(None)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
for key in [
|
||
|
slice("2011-01-01", "2011-01-15"),
|
||
|
slice("2010-12-30", "2011-01-15"),
|
||
|
slice("2011-01-01", "2011-01-16"),
|
||
|
# partial ranges
|
||
|
slice("2011-01-01", "2011-01-6"),
|
||
|
slice("2011-01-06", "2011-01-8"),
|
||
|
slice("2011-01-06", "2011-01-12"),
|
||
|
]:
|
||
|
with pytest.raises(
|
||
|
KeyError, match="Value based partial slicing on non-monotonic"
|
||
|
):
|
||
|
compare(key)
|
||
|
|
||
|
# single values
|
||
|
result = ts2["2011"].sort_index()
|
||
|
expected = ts["2011"]
|
||
|
expected.index = expected.index._with_freq(None)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_indexing_unordered2():
|
||
|
# diff freq
|
||
|
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
|
||
|
ts = Series(np.arange(len(rng)), index=rng)
|
||
|
ts = ts.take(np.random.default_rng(2).permutation(20))
|
||
|
|
||
|
result = ts["2005"]
|
||
|
for t in result.index:
|
||
|
assert t.year == 2005
|
||
|
|
||
|
|
||
|
def test_indexing():
|
||
|
idx = date_range("2001-1-1", periods=20, freq="ME")
|
||
|
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||
|
|
||
|
# getting
|
||
|
|
||
|
# GH 3070, make sure semantics work on Series/Frame
|
||
|
result = ts["2001"]
|
||
|
tm.assert_series_equal(result, ts.iloc[:12])
|
||
|
|
||
|
df = DataFrame({"A": ts.copy()})
|
||
|
|
||
|
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
|
||
|
# like any other key, so raises
|
||
|
with pytest.raises(KeyError, match="2001"):
|
||
|
df["2001"]
|
||
|
|
||
|
# setting
|
||
|
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||
|
expected = ts.copy()
|
||
|
expected.iloc[:12] = 1
|
||
|
ts["2001"] = 1
|
||
|
tm.assert_series_equal(ts, expected)
|
||
|
|
||
|
expected = df.copy()
|
||
|
expected.iloc[:12, 0] = 1
|
||
|
df.loc["2001", "A"] = 1
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
|
||
|
def test_getitem_str_month_with_datetimeindex():
|
||
|
# GH3546 (not including times on the last day)
|
||
|
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
|
||
|
ts = Series(range(len(idx)), index=idx)
|
||
|
expected = ts["2013-05"]
|
||
|
tm.assert_series_equal(expected, ts)
|
||
|
|
||
|
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
|
||
|
ts = Series(range(len(idx)), index=idx)
|
||
|
expected = ts["2013-05"]
|
||
|
tm.assert_series_equal(expected, ts)
|
||
|
|
||
|
|
||
|
def test_getitem_str_year_with_datetimeindex():
|
||
|
idx = [
|
||
|
Timestamp("2013-05-31 00:00"),
|
||
|
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||
|
]
|
||
|
ts = Series(range(len(idx)), index=idx)
|
||
|
expected = ts["2013"]
|
||
|
tm.assert_series_equal(expected, ts)
|
||
|
|
||
|
|
||
|
def test_getitem_str_second_with_datetimeindex():
|
||
|
# GH14826, indexing with a seconds resolution string / datetime object
|
||
|
df = DataFrame(
|
||
|
np.random.default_rng(2).random((5, 5)),
|
||
|
columns=["open", "high", "low", "close", "volume"],
|
||
|
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||
|
)
|
||
|
|
||
|
# this is a single date, so will raise
|
||
|
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||
|
df["2012-01-02 18:01:02"]
|
||
|
|
||
|
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
df[df.index[2]]
|
||
|
|
||
|
|
||
|
def test_compare_datetime_with_all_none():
|
||
|
# GH#54870
|
||
|
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||
|
ser2 = Series([None, None])
|
||
|
result = ser > ser2
|
||
|
expected = Series([False, False])
|
||
|
tm.assert_series_equal(result, expected)
|