You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
379 lines
12 KiB
379 lines
12 KiB
from datetime import (
|
|
date,
|
|
timedelta,
|
|
)
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas._libs.tslibs.timezones import maybe_get_tz
|
|
import pandas.util._test_decorators as td
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
DataFrame,
|
|
DatetimeIndex,
|
|
Series,
|
|
Timestamp,
|
|
date_range,
|
|
)
|
|
import pandas._testing as tm
|
|
from pandas.tests.io.pytables.common import (
|
|
_maybe_remove,
|
|
ensure_clean_store,
|
|
)
|
|
|
|
|
|
def _compare_with_tz(a, b):
|
|
tm.assert_frame_equal(a, b)
|
|
|
|
# compare the zones on each element
|
|
for c in a.columns:
|
|
for i in a.index:
|
|
a_e = a.loc[i, c]
|
|
b_e = b.loc[i, c]
|
|
if not (a_e == b_e and a_e.tz == b_e.tz):
|
|
raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
|
|
|
|
|
|
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
|
|
# filename issues.
|
|
gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
|
|
gettz_pytz = lambda x: x
|
|
|
|
|
|
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
|
def test_append_with_timezones(setup_path, gettz):
|
|
# as columns
|
|
|
|
# Single-tzinfo, no DST transition
|
|
df_est = DataFrame(
|
|
{
|
|
"A": [
|
|
Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns")
|
|
+ timedelta(hours=1) * i
|
|
for i in range(5)
|
|
]
|
|
}
|
|
)
|
|
|
|
# frame with all columns having same tzinfo, but different sides
|
|
# of DST transition
|
|
df_crosses_dst = DataFrame(
|
|
{
|
|
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
|
"B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"),
|
|
},
|
|
index=range(5),
|
|
)
|
|
|
|
df_mixed_tz = DataFrame(
|
|
{
|
|
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
|
"B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"),
|
|
},
|
|
index=range(5),
|
|
)
|
|
|
|
df_different_tz = DataFrame(
|
|
{
|
|
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
|
"B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"),
|
|
},
|
|
index=range(5),
|
|
)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
_maybe_remove(store, "df_tz")
|
|
store.append("df_tz", df_est, data_columns=["A"])
|
|
result = store["df_tz"]
|
|
_compare_with_tz(result, df_est)
|
|
tm.assert_frame_equal(result, df_est)
|
|
|
|
# select with tz aware
|
|
expected = df_est[df_est.A >= df_est.A[3]]
|
|
result = store.select("df_tz", where="A>=df_est.A[3]")
|
|
_compare_with_tz(result, expected)
|
|
|
|
# ensure we include dates in DST and STD time here.
|
|
_maybe_remove(store, "df_tz")
|
|
store.append("df_tz", df_crosses_dst)
|
|
result = store["df_tz"]
|
|
_compare_with_tz(result, df_crosses_dst)
|
|
tm.assert_frame_equal(result, df_crosses_dst)
|
|
|
|
msg = (
|
|
r"invalid info for \[values_block_1\] for \[tz\], "
|
|
r"existing_value \[(dateutil/.*)?US/Eastern\] "
|
|
r"conflicts with new value \[(dateutil/.*)?EET\]"
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
store.append("df_tz", df_mixed_tz)
|
|
|
|
# this is ok
|
|
_maybe_remove(store, "df_tz")
|
|
store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
|
|
result = store["df_tz"]
|
|
_compare_with_tz(result, df_mixed_tz)
|
|
tm.assert_frame_equal(result, df_mixed_tz)
|
|
|
|
# can't append with diff timezone
|
|
msg = (
|
|
r"invalid info for \[B\] for \[tz\], "
|
|
r"existing_value \[(dateutil/.*)?EET\] "
|
|
r"conflicts with new value \[(dateutil/.*)?CET\]"
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
store.append("df_tz", df_different_tz)
|
|
|
|
|
|
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
|
def test_append_with_timezones_as_index(setup_path, gettz):
|
|
# GH#4098 example
|
|
|
|
dti = date_range("2000-1-1", periods=3, freq="h", tz=gettz("US/Eastern"))
|
|
dti = dti._with_freq(None) # freq doesn't round-trip
|
|
|
|
df = DataFrame({"A": Series(range(3), index=dti)})
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
_maybe_remove(store, "df")
|
|
store.put("df", df)
|
|
result = store.select("df")
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
_maybe_remove(store, "df")
|
|
store.append("df", df)
|
|
result = store.select("df")
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
|
|
def test_roundtrip_tz_aware_index(setup_path, unit):
|
|
# GH 17618
|
|
ts = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
|
|
dti = DatetimeIndex([ts]).as_unit(unit)
|
|
df = DataFrame(data=[0], index=dti)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store.put("frame", df, format="fixed")
|
|
recons = store["frame"]
|
|
tm.assert_frame_equal(recons, df)
|
|
|
|
value = recons.index[0]._value
|
|
denom = {"ns": 1, "us": 1000, "ms": 10**6, "s": 10**9}[unit]
|
|
assert value == 946706400000000000 // denom
|
|
|
|
|
|
def test_store_index_name_with_tz(setup_path):
|
|
# GH 13884
|
|
df = DataFrame({"A": [1, 2]})
|
|
df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
|
|
df.index = df.index.tz_localize("UTC")
|
|
df.index.name = "foo"
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store.put("frame", df, format="table")
|
|
recons = store["frame"]
|
|
tm.assert_frame_equal(recons, df)
|
|
|
|
|
|
def test_tseries_select_index_column(setup_path):
|
|
# GH7777
|
|
# selecting a UTC datetimeindex column did
|
|
# not preserve UTC tzinfo set before storing
|
|
|
|
# check that no tz still works
|
|
rng = date_range("1/1/2000", "1/30/2000")
|
|
frame = DataFrame(
|
|
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
|
)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store.append("frame", frame)
|
|
result = store.select_column("frame", "index")
|
|
assert rng.tz == DatetimeIndex(result.values).tz
|
|
|
|
# check utc
|
|
rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
|
|
frame = DataFrame(
|
|
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
|
)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store.append("frame", frame)
|
|
result = store.select_column("frame", "index")
|
|
assert rng.tz == result.dt.tz
|
|
|
|
# double check non-utc
|
|
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
|
frame = DataFrame(
|
|
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
|
)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store.append("frame", frame)
|
|
result = store.select_column("frame", "index")
|
|
assert rng.tz == result.dt.tz
|
|
|
|
|
|
def test_timezones_fixed_format_frame_non_empty(setup_path):
|
|
with ensure_clean_store(setup_path) as store:
|
|
# index
|
|
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
|
rng = rng._with_freq(None) # freq doesn't round-trip
|
|
df = DataFrame(
|
|
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
|
)
|
|
store["df"] = df
|
|
result = store["df"]
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# as data
|
|
# GH11411
|
|
_maybe_remove(store, "df")
|
|
df = DataFrame(
|
|
{
|
|
"A": rng,
|
|
"B": rng.tz_convert("UTC").tz_localize(None),
|
|
"C": rng.tz_convert("CET"),
|
|
"D": range(len(rng)),
|
|
},
|
|
index=rng,
|
|
)
|
|
store["df"] = df
|
|
result = store["df"]
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
|
|
def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
|
|
# GH 20594
|
|
|
|
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
|
|
|
obj = Series(dtype=dtype, name="A")
|
|
if frame_or_series is DataFrame:
|
|
obj = obj.to_frame()
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store["obj"] = obj
|
|
result = store["obj"]
|
|
tm.assert_equal(result, obj)
|
|
|
|
|
|
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
|
|
# GH 20594
|
|
|
|
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
s = Series([0], dtype=dtype)
|
|
store["s"] = s
|
|
result = store["s"]
|
|
tm.assert_series_equal(result, s)
|
|
|
|
|
|
def test_fixed_offset_tz(setup_path):
|
|
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
|
|
frame = DataFrame(
|
|
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
|
)
|
|
|
|
with ensure_clean_store(setup_path) as store:
|
|
store["frame"] = frame
|
|
recons = store["frame"]
|
|
tm.assert_index_equal(recons.index, rng)
|
|
assert rng.tz == recons.index.tz
|
|
|
|
|
|
@td.skip_if_windows
|
|
def test_store_timezone(setup_path):
|
|
# GH2852
|
|
# issue storing datetime.date with a timezone as it resets when read
|
|
# back in a new timezone
|
|
|
|
# original method
|
|
with ensure_clean_store(setup_path) as store:
|
|
today = date(2013, 9, 10)
|
|
df = DataFrame([1, 2, 3], index=[today, today, today])
|
|
store["obj1"] = df
|
|
result = store["obj1"]
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# with tz setting
|
|
with ensure_clean_store(setup_path) as store:
|
|
with tm.set_timezone("EST5EDT"):
|
|
today = date(2013, 9, 10)
|
|
df = DataFrame([1, 2, 3], index=[today, today, today])
|
|
store["obj1"] = df
|
|
|
|
with tm.set_timezone("CST6CDT"):
|
|
result = store["obj1"]
|
|
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
|
|
def test_legacy_datetimetz_object(datapath):
|
|
# legacy from < 0.17.0
|
|
# 8260
|
|
expected = DataFrame(
|
|
{
|
|
"A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"),
|
|
"B": Timestamp("20130603", tz="CET").as_unit("ns"),
|
|
},
|
|
index=range(5),
|
|
)
|
|
with ensure_clean_store(
|
|
datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
|
|
) as store:
|
|
result = store["df"]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_dst_transitions(setup_path):
|
|
# make sure we are not failing on transitions
|
|
with ensure_clean_store(setup_path) as store:
|
|
times = date_range(
|
|
"2013-10-26 23:00",
|
|
"2013-10-27 01:00",
|
|
tz="Europe/London",
|
|
freq="h",
|
|
ambiguous="infer",
|
|
)
|
|
times = times._with_freq(None) # freq doesn't round-trip
|
|
|
|
for i in [times, times + pd.Timedelta("10min")]:
|
|
_maybe_remove(store, "df")
|
|
df = DataFrame({"A": range(len(i)), "B": i}, index=i)
|
|
store.append("df", df)
|
|
result = store.select("df")
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
|
|
def test_read_with_where_tz_aware_index(tmp_path, setup_path):
|
|
# GH 11926
|
|
periods = 10
|
|
dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
|
|
mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
|
|
expected = DataFrame({"MYCOL": 0}, index=mi)
|
|
|
|
key = "mykey"
|
|
path = tmp_path / setup_path
|
|
with pd.HDFStore(path) as store:
|
|
store.append(key, expected, format="table", append=True)
|
|
result = pd.read_hdf(path, key, where="DATE > 20151130")
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_py2_created_with_datetimez(datapath):
|
|
# The test HDF5 file was created in Python 2, but could not be read in
|
|
# Python 3.
|
|
#
|
|
# GH26443
|
|
index = DatetimeIndex(["2019-01-01T18:00"], dtype="M8[ns, America/New_York]")
|
|
expected = DataFrame({"data": 123}, index=index)
|
|
with ensure_clean_store(
|
|
datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
|
|
) as store:
|
|
result = store["key"]
|
|
tm.assert_frame_equal(result, expected)
|