You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
559 lines
15 KiB
559 lines
15 KiB
from datetime import (
|
|
datetime,
|
|
timedelta,
|
|
)
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas._libs.tslibs.ccalendar import (
|
|
DAYS,
|
|
MONTHS,
|
|
)
|
|
from pandas._libs.tslibs.offsets import _get_offset
|
|
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
|
|
from pandas.compat import is_platform_windows
|
|
|
|
from pandas import (
|
|
DatetimeIndex,
|
|
Index,
|
|
RangeIndex,
|
|
Series,
|
|
Timestamp,
|
|
date_range,
|
|
period_range,
|
|
)
|
|
import pandas._testing as tm
|
|
from pandas.core.arrays import (
|
|
DatetimeArray,
|
|
TimedeltaArray,
|
|
)
|
|
from pandas.core.tools.datetimes import to_datetime
|
|
|
|
from pandas.tseries import (
|
|
frequencies,
|
|
offsets,
|
|
)
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
(timedelta(1), "D"),
|
|
(timedelta(hours=1), "h"),
|
|
(timedelta(minutes=1), "min"),
|
|
(timedelta(seconds=1), "s"),
|
|
(np.timedelta64(1, "ns"), "ns"),
|
|
(timedelta(microseconds=1), "us"),
|
|
(timedelta(microseconds=1000), "ms"),
|
|
]
|
|
)
|
|
def base_delta_code_pair(request):
|
|
return request.param
|
|
|
|
|
|
freqs = (
|
|
[f"QE-{month}" for month in MONTHS]
|
|
+ [f"{annual}-{month}" for annual in ["YE", "BYE"] for month in MONTHS]
|
|
+ ["ME", "BME", "BMS"]
|
|
+ [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS]
|
|
+ [f"W-{day}" for day in DAYS]
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("freq", freqs)
|
|
@pytest.mark.parametrize("periods", [5, 7])
|
|
def test_infer_freq_range(periods, freq):
|
|
freq = freq.upper()
|
|
|
|
gen = date_range("1/1/2000", periods=periods, freq=freq)
|
|
index = DatetimeIndex(gen.values)
|
|
|
|
if not freq.startswith("QE-"):
|
|
assert frequencies.infer_freq(index) == gen.freqstr
|
|
else:
|
|
inf_freq = frequencies.infer_freq(index)
|
|
is_dec_range = inf_freq == "QE-DEC" and gen.freqstr in (
|
|
"QE",
|
|
"QE-DEC",
|
|
"QE-SEP",
|
|
"QE-JUN",
|
|
"QE-MAR",
|
|
)
|
|
is_nov_range = inf_freq == "QE-NOV" and gen.freqstr in (
|
|
"QE-NOV",
|
|
"QE-AUG",
|
|
"QE-MAY",
|
|
"QE-FEB",
|
|
)
|
|
is_oct_range = inf_freq == "QE-OCT" and gen.freqstr in (
|
|
"QE-OCT",
|
|
"QE-JUL",
|
|
"QE-APR",
|
|
"QE-JAN",
|
|
)
|
|
assert is_dec_range or is_nov_range or is_oct_range
|
|
|
|
|
|
def test_raise_if_period_index():
|
|
index = period_range(start="1/1/1990", periods=20, freq="M")
|
|
msg = "Check the `freq` attribute instead of using infer_freq"
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
frequencies.infer_freq(index)
|
|
|
|
|
|
def test_raise_if_too_few():
|
|
index = DatetimeIndex(["12/31/1998", "1/3/1999"])
|
|
msg = "Need at least 3 dates to infer frequency"
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
frequencies.infer_freq(index)
|
|
|
|
|
|
def test_business_daily():
|
|
index = DatetimeIndex(["01/01/1999", "1/4/1999", "1/5/1999"])
|
|
assert frequencies.infer_freq(index) == "B"
|
|
|
|
|
|
def test_business_daily_look_alike():
|
|
# see gh-16624
|
|
#
|
|
# Do not infer "B when "weekend" (2-day gap) in wrong place.
|
|
index = DatetimeIndex(["12/31/1998", "1/3/1999", "1/4/1999"])
|
|
assert frequencies.infer_freq(index) is None
|
|
|
|
|
|
def test_day_corner():
|
|
index = DatetimeIndex(["1/1/2000", "1/2/2000", "1/3/2000"])
|
|
assert frequencies.infer_freq(index) == "D"
|
|
|
|
|
|
def test_non_datetime_index():
|
|
dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
|
|
assert frequencies.infer_freq(dates) == "D"
|
|
|
|
|
|
def test_fifth_week_of_month_infer():
|
|
# see gh-9425
|
|
#
|
|
# Only attempt to infer up to WOM-4.
|
|
index = DatetimeIndex(["2014-03-31", "2014-06-30", "2015-03-30"])
|
|
assert frequencies.infer_freq(index) is None
|
|
|
|
|
|
def test_week_of_month_fake():
|
|
# All of these dates are on same day
|
|
# of week and are 4 or 5 weeks apart.
|
|
index = DatetimeIndex(["2013-08-27", "2013-10-01", "2013-10-29", "2013-11-26"])
|
|
assert frequencies.infer_freq(index) != "WOM-4TUE"
|
|
|
|
|
|
def test_fifth_week_of_month():
|
|
# see gh-9425
|
|
#
|
|
# Only supports freq up to WOM-4.
|
|
msg = (
|
|
"Of the four parameters: start, end, periods, "
|
|
"and freq, exactly three must be specified"
|
|
)
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
date_range("2014-01-01", freq="WOM-5MON")
|
|
|
|
|
|
def test_monthly_ambiguous():
|
|
rng = DatetimeIndex(["1/31/2000", "2/29/2000", "3/31/2000"])
|
|
assert rng.inferred_freq == "ME"
|
|
|
|
|
|
def test_annual_ambiguous():
|
|
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
|
|
assert rng.inferred_freq == "YE-JAN"
|
|
|
|
|
|
@pytest.mark.parametrize("count", range(1, 5))
|
|
def test_infer_freq_delta(base_delta_code_pair, count):
|
|
b = Timestamp(datetime.now())
|
|
base_delta, code = base_delta_code_pair
|
|
|
|
inc = base_delta * count
|
|
index = DatetimeIndex([b + inc * j for j in range(3)])
|
|
|
|
exp_freq = f"{count:d}{code}" if count > 1 else code
|
|
assert frequencies.infer_freq(index) == exp_freq
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"constructor",
|
|
[
|
|
lambda now, delta: DatetimeIndex(
|
|
[now + delta * 7] + [now + delta * j for j in range(3)]
|
|
),
|
|
lambda now, delta: DatetimeIndex(
|
|
[now + delta * j for j in range(3)] + [now + delta * 7]
|
|
),
|
|
],
|
|
)
|
|
def test_infer_freq_custom(base_delta_code_pair, constructor):
|
|
b = Timestamp(datetime.now())
|
|
base_delta, _ = base_delta_code_pair
|
|
|
|
index = constructor(b, base_delta)
|
|
assert frequencies.infer_freq(index) is None
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"freq,expected", [("Q", "QE-DEC"), ("Q-NOV", "QE-NOV"), ("Q-OCT", "QE-OCT")]
|
|
)
|
|
def test_infer_freq_index(freq, expected):
|
|
rng = period_range("1959Q2", "2009Q3", freq=freq)
|
|
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
|
rng = Index(rng.to_timestamp("D", how="e").astype(object))
|
|
|
|
assert rng.inferred_freq == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"expected,dates",
|
|
list(
|
|
{
|
|
"YS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"],
|
|
"QE-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"],
|
|
"ME": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"],
|
|
"W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"],
|
|
"D": ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"],
|
|
"h": [
|
|
"2011-12-31 22:00",
|
|
"2011-12-31 23:00",
|
|
"2012-01-01 00:00",
|
|
"2012-01-01 01:00",
|
|
],
|
|
}.items()
|
|
),
|
|
)
|
|
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
|
def test_infer_freq_tz(tz_naive_fixture, expected, dates, unit):
|
|
# see gh-7310, GH#55609
|
|
tz = tz_naive_fixture
|
|
idx = DatetimeIndex(dates, tz=tz).as_unit(unit)
|
|
assert idx.inferred_freq == expected
|
|
|
|
|
|
def test_infer_freq_tz_series(tz_naive_fixture):
|
|
# infer_freq should work with both tz-naive and tz-aware series. See gh-52456
|
|
tz = tz_naive_fixture
|
|
idx = date_range("2021-01-01", "2021-01-04", tz=tz)
|
|
series = idx.to_series().reset_index(drop=True)
|
|
inferred_freq = frequencies.infer_freq(series)
|
|
assert inferred_freq == "D"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"date_pair",
|
|
[
|
|
["2013-11-02", "2013-11-5"], # Fall DST
|
|
["2014-03-08", "2014-03-11"], # Spring DST
|
|
["2014-01-01", "2014-01-03"], # Regular Time
|
|
],
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"freq",
|
|
["h", "3h", "10min", "3601s", "3600001ms", "3600000001us", "3600000000001ns"],
|
|
)
|
|
def test_infer_freq_tz_transition(tz_naive_fixture, date_pair, freq):
|
|
# see gh-8772
|
|
tz = tz_naive_fixture
|
|
idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz)
|
|
assert idx.inferred_freq == freq
|
|
|
|
|
|
def test_infer_freq_tz_transition_custom():
|
|
index = date_range("2013-11-03", periods=5, freq="3h").tz_localize(
|
|
"America/Chicago"
|
|
)
|
|
assert index.inferred_freq is None
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"data,expected",
|
|
[
|
|
# Hourly freq in a day must result in "h"
|
|
(
|
|
[
|
|
"2014-07-01 09:00",
|
|
"2014-07-01 10:00",
|
|
"2014-07-01 11:00",
|
|
"2014-07-01 12:00",
|
|
"2014-07-01 13:00",
|
|
"2014-07-01 14:00",
|
|
],
|
|
"h",
|
|
),
|
|
(
|
|
[
|
|
"2014-07-01 09:00",
|
|
"2014-07-01 10:00",
|
|
"2014-07-01 11:00",
|
|
"2014-07-01 12:00",
|
|
"2014-07-01 13:00",
|
|
"2014-07-01 14:00",
|
|
"2014-07-01 15:00",
|
|
"2014-07-01 16:00",
|
|
"2014-07-02 09:00",
|
|
"2014-07-02 10:00",
|
|
"2014-07-02 11:00",
|
|
],
|
|
"bh",
|
|
),
|
|
(
|
|
[
|
|
"2014-07-04 09:00",
|
|
"2014-07-04 10:00",
|
|
"2014-07-04 11:00",
|
|
"2014-07-04 12:00",
|
|
"2014-07-04 13:00",
|
|
"2014-07-04 14:00",
|
|
"2014-07-04 15:00",
|
|
"2014-07-04 16:00",
|
|
"2014-07-07 09:00",
|
|
"2014-07-07 10:00",
|
|
"2014-07-07 11:00",
|
|
],
|
|
"bh",
|
|
),
|
|
(
|
|
[
|
|
"2014-07-04 09:00",
|
|
"2014-07-04 10:00",
|
|
"2014-07-04 11:00",
|
|
"2014-07-04 12:00",
|
|
"2014-07-04 13:00",
|
|
"2014-07-04 14:00",
|
|
"2014-07-04 15:00",
|
|
"2014-07-04 16:00",
|
|
"2014-07-07 09:00",
|
|
"2014-07-07 10:00",
|
|
"2014-07-07 11:00",
|
|
"2014-07-07 12:00",
|
|
"2014-07-07 13:00",
|
|
"2014-07-07 14:00",
|
|
"2014-07-07 15:00",
|
|
"2014-07-07 16:00",
|
|
"2014-07-08 09:00",
|
|
"2014-07-08 10:00",
|
|
"2014-07-08 11:00",
|
|
"2014-07-08 12:00",
|
|
"2014-07-08 13:00",
|
|
"2014-07-08 14:00",
|
|
"2014-07-08 15:00",
|
|
"2014-07-08 16:00",
|
|
],
|
|
"bh",
|
|
),
|
|
],
|
|
)
|
|
def test_infer_freq_business_hour(data, expected):
|
|
# see gh-7905
|
|
idx = DatetimeIndex(data)
|
|
assert idx.inferred_freq == expected
|
|
|
|
|
|
def test_not_monotonic():
|
|
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
|
|
rng = rng[::-1]
|
|
|
|
assert rng.inferred_freq == "-1YE-JAN"
|
|
|
|
|
|
def test_non_datetime_index2():
|
|
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
|
|
vals = rng.to_pydatetime()
|
|
|
|
result = frequencies.infer_freq(vals)
|
|
assert result == rng.inferred_freq
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"idx",
|
|
[
|
|
Index(np.arange(5), dtype=np.int64),
|
|
Index(np.arange(5), dtype=np.float64),
|
|
period_range("2020-01-01", periods=5),
|
|
RangeIndex(5),
|
|
],
|
|
)
|
|
def test_invalid_index_types(idx):
|
|
# see gh-48439
|
|
msg = "|".join(
|
|
[
|
|
"cannot infer freq from a non-convertible",
|
|
"Check the `freq` attribute instead of using infer_freq",
|
|
]
|
|
)
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
frequencies.infer_freq(idx)
|
|
|
|
|
|
@pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue")
|
|
def test_invalid_index_types_unicode():
|
|
# see gh-10822
|
|
#
|
|
# Odd error message on conversions to datetime for unicode.
|
|
msg = "Unknown datetime string format"
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
frequencies.infer_freq(Index(["ZqgszYBfuL"]))
|
|
|
|
|
|
def test_string_datetime_like_compat():
|
|
# see gh-6463
|
|
data = ["2004-01", "2004-02", "2004-03", "2004-04"]
|
|
|
|
expected = frequencies.infer_freq(data)
|
|
result = frequencies.infer_freq(Index(data))
|
|
|
|
assert result == expected
|
|
|
|
|
|
def test_series():
|
|
# see gh-6407
|
|
s = Series(date_range("20130101", "20130110"))
|
|
inferred = frequencies.infer_freq(s)
|
|
assert inferred == "D"
|
|
|
|
|
|
@pytest.mark.parametrize("end", [10, 10.0])
|
|
def test_series_invalid_type(end):
|
|
# see gh-6407
|
|
msg = "cannot infer freq from a non-convertible dtype on a Series"
|
|
s = Series(np.arange(end))
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
frequencies.infer_freq(s)
|
|
|
|
|
|
def test_series_inconvertible_string(using_infer_string):
|
|
# see gh-6407
|
|
if using_infer_string:
|
|
msg = "cannot infer freq from"
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
frequencies.infer_freq(Series(["foo", "bar"]))
|
|
else:
|
|
msg = "Unknown datetime string format"
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
frequencies.infer_freq(Series(["foo", "bar"]))
|
|
|
|
|
|
@pytest.mark.parametrize("freq", [None, "ms"])
|
|
def test_series_period_index(freq):
|
|
# see gh-6407
|
|
#
|
|
# Cannot infer on PeriodIndex
|
|
msg = "cannot infer freq from a non-convertible dtype on a Series"
|
|
s = Series(period_range("2013", periods=10, freq=freq))
|
|
|
|
with pytest.raises(TypeError, match=msg):
|
|
frequencies.infer_freq(s)
|
|
|
|
|
|
@pytest.mark.parametrize("freq", ["ME", "ms", "s"])
|
|
def test_series_datetime_index(freq):
|
|
s = Series(date_range("20130101", periods=10, freq=freq))
|
|
inferred = frequencies.infer_freq(s)
|
|
assert inferred == freq
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"offset_func",
|
|
[
|
|
_get_offset,
|
|
lambda freq: date_range("2011-01-01", periods=5, freq=freq),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"freq",
|
|
[
|
|
"WEEKDAY",
|
|
"EOM",
|
|
"W@MON",
|
|
"W@TUE",
|
|
"W@WED",
|
|
"W@THU",
|
|
"W@FRI",
|
|
"W@SAT",
|
|
"W@SUN",
|
|
"QE@JAN",
|
|
"QE@FEB",
|
|
"QE@MAR",
|
|
"YE@JAN",
|
|
"YE@FEB",
|
|
"YE@MAR",
|
|
"YE@APR",
|
|
"YE@MAY",
|
|
"YE@JUN",
|
|
"YE@JUL",
|
|
"YE@AUG",
|
|
"YE@SEP",
|
|
"YE@OCT",
|
|
"YE@NOV",
|
|
"YE@DEC",
|
|
"YE@JAN",
|
|
"WOM@1MON",
|
|
"WOM@2MON",
|
|
"WOM@3MON",
|
|
"WOM@4MON",
|
|
"WOM@1TUE",
|
|
"WOM@2TUE",
|
|
"WOM@3TUE",
|
|
"WOM@4TUE",
|
|
"WOM@1WED",
|
|
"WOM@2WED",
|
|
"WOM@3WED",
|
|
"WOM@4WED",
|
|
"WOM@1THU",
|
|
"WOM@2THU",
|
|
"WOM@3THU",
|
|
"WOM@4THU",
|
|
"WOM@1FRI",
|
|
"WOM@2FRI",
|
|
"WOM@3FRI",
|
|
"WOM@4FRI",
|
|
],
|
|
)
|
|
def test_legacy_offset_warnings(offset_func, freq):
|
|
with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
|
|
offset_func(freq)
|
|
|
|
|
|
def test_ms_vs_capital_ms():
|
|
left = _get_offset("ms")
|
|
right = _get_offset("MS")
|
|
|
|
assert left == offsets.Milli()
|
|
assert right == offsets.MonthBegin()
|
|
|
|
|
|
def test_infer_freq_non_nano():
|
|
arr = np.arange(10).astype(np.int64).view("M8[s]")
|
|
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
|
|
res = frequencies.infer_freq(dta)
|
|
assert res == "s"
|
|
|
|
arr2 = arr.view("m8[ms]")
|
|
tda = TimedeltaArray._simple_new(arr2, dtype=arr2.dtype)
|
|
res2 = frequencies.infer_freq(tda)
|
|
assert res2 == "ms"
|
|
|
|
|
|
def test_infer_freq_non_nano_tzaware(tz_aware_fixture):
|
|
tz = tz_aware_fixture
|
|
|
|
dti = date_range("2016-01-01", periods=365, freq="B", tz=tz)
|
|
dta = dti._data.as_unit("s")
|
|
|
|
res = frequencies.infer_freq(dta)
|
|
assert res == "B"
|