You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

379 lines
12 KiB

from datetime import (
date,
timedelta,
)
import numpy as np
import pytest
from pandas._libs.tslibs.timezones import maybe_get_tz
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.tests.io.pytables.common import (
_maybe_remove,
ensure_clean_store,
)
def _compare_with_tz(a, b):
tm.assert_frame_equal(a, b)
# compare the zones on each element
for c in a.columns:
for i in a.index:
a_e = a.loc[i, c]
b_e = b.loc[i, c]
if not (a_e == b_e and a_e.tz == b_e.tz):
raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
# filename issues.
gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
gettz_pytz = lambda x: x
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
def test_append_with_timezones(setup_path, gettz):
# as columns
# Single-tzinfo, no DST transition
df_est = DataFrame(
{
"A": [
Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns")
+ timedelta(hours=1) * i
for i in range(5)
]
}
)
# frame with all columns having same tzinfo, but different sides
# of DST transition
df_crosses_dst = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
"B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"),
},
index=range(5),
)
df_mixed_tz = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
"B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"),
},
index=range(5),
)
df_different_tz = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
"B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"),
},
index=range(5),
)
with ensure_clean_store(setup_path) as store:
_maybe_remove(store, "df_tz")
store.append("df_tz", df_est, data_columns=["A"])
result = store["df_tz"]
_compare_with_tz(result, df_est)
tm.assert_frame_equal(result, df_est)
# select with tz aware
expected = df_est[df_est.A >= df_est.A[3]]
result = store.select("df_tz", where="A>=df_est.A[3]")
_compare_with_tz(result, expected)
# ensure we include dates in DST and STD time here.
_maybe_remove(store, "df_tz")
store.append("df_tz", df_crosses_dst)
result = store["df_tz"]
_compare_with_tz(result, df_crosses_dst)
tm.assert_frame_equal(result, df_crosses_dst)
msg = (
r"invalid info for \[values_block_1\] for \[tz\], "
r"existing_value \[(dateutil/.*)?US/Eastern\] "
r"conflicts with new value \[(dateutil/.*)?EET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df_mixed_tz)
# this is ok
_maybe_remove(store, "df_tz")
store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
result = store["df_tz"]
_compare_with_tz(result, df_mixed_tz)
tm.assert_frame_equal(result, df_mixed_tz)
# can't append with diff timezone
msg = (
r"invalid info for \[B\] for \[tz\], "
r"existing_value \[(dateutil/.*)?EET\] "
r"conflicts with new value \[(dateutil/.*)?CET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df_different_tz)
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
def test_append_with_timezones_as_index(setup_path, gettz):
# GH#4098 example
dti = date_range("2000-1-1", periods=3, freq="h", tz=gettz("US/Eastern"))
dti = dti._with_freq(None) # freq doesn't round-trip
df = DataFrame({"A": Series(range(3), index=dti)})
with ensure_clean_store(setup_path) as store:
_maybe_remove(store, "df")
store.put("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
_maybe_remove(store, "df")
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
def test_roundtrip_tz_aware_index(setup_path, unit):
# GH 17618
ts = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
dti = DatetimeIndex([ts]).as_unit(unit)
df = DataFrame(data=[0], index=dti)
with ensure_clean_store(setup_path) as store:
store.put("frame", df, format="fixed")
recons = store["frame"]
tm.assert_frame_equal(recons, df)
value = recons.index[0]._value
denom = {"ns": 1, "us": 1000, "ms": 10**6, "s": 10**9}[unit]
assert value == 946706400000000000 // denom
def test_store_index_name_with_tz(setup_path):
# GH 13884
df = DataFrame({"A": [1, 2]})
df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
df.index = df.index.tz_localize("UTC")
df.index.name = "foo"
with ensure_clean_store(setup_path) as store:
store.put("frame", df, format="table")
recons = store["frame"]
tm.assert_frame_equal(recons, df)
def test_tseries_select_index_column(setup_path):
# GH7777
# selecting a UTC datetimeindex column did
# not preserve UTC tzinfo set before storing
# check that no tz still works
rng = date_range("1/1/2000", "1/30/2000")
frame = DataFrame(
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == DatetimeIndex(result.values).tz
# check utc
rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
frame = DataFrame(
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == result.dt.tz
# double check non-utc
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
frame = DataFrame(
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == result.dt.tz
def test_timezones_fixed_format_frame_non_empty(setup_path):
with ensure_clean_store(setup_path) as store:
# index
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
rng = rng._with_freq(None) # freq doesn't round-trip
df = DataFrame(
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
)
store["df"] = df
result = store["df"]
tm.assert_frame_equal(result, df)
# as data
# GH11411
_maybe_remove(store, "df")
df = DataFrame(
{
"A": rng,
"B": rng.tz_convert("UTC").tz_localize(None),
"C": rng.tz_convert("CET"),
"D": range(len(rng)),
},
index=rng,
)
store["df"] = df
result = store["df"]
tm.assert_frame_equal(result, df)
def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
# GH 20594
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
obj = Series(dtype=dtype, name="A")
if frame_or_series is DataFrame:
obj = obj.to_frame()
with ensure_clean_store(setup_path) as store:
store["obj"] = obj
result = store["obj"]
tm.assert_equal(result, obj)
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
# GH 20594
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
with ensure_clean_store(setup_path) as store:
s = Series([0], dtype=dtype)
store["s"] = s
result = store["s"]
tm.assert_series_equal(result, s)
def test_fixed_offset_tz(setup_path):
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
frame = DataFrame(
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
)
with ensure_clean_store(setup_path) as store:
store["frame"] = frame
recons = store["frame"]
tm.assert_index_equal(recons.index, rng)
assert rng.tz == recons.index.tz
@td.skip_if_windows
def test_store_timezone(setup_path):
# GH2852
# issue storing datetime.date with a timezone as it resets when read
# back in a new timezone
# original method
with ensure_clean_store(setup_path) as store:
today = date(2013, 9, 10)
df = DataFrame([1, 2, 3], index=[today, today, today])
store["obj1"] = df
result = store["obj1"]
tm.assert_frame_equal(result, df)
# with tz setting
with ensure_clean_store(setup_path) as store:
with tm.set_timezone("EST5EDT"):
today = date(2013, 9, 10)
df = DataFrame([1, 2, 3], index=[today, today, today])
store["obj1"] = df
with tm.set_timezone("CST6CDT"):
result = store["obj1"]
tm.assert_frame_equal(result, df)
def test_legacy_datetimetz_object(datapath):
# legacy from < 0.17.0
# 8260
expected = DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"),
"B": Timestamp("20130603", tz="CET").as_unit("ns"),
},
index=range(5),
)
with ensure_clean_store(
datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
) as store:
result = store["df"]
tm.assert_frame_equal(result, expected)
def test_dst_transitions(setup_path):
# make sure we are not failing on transitions
with ensure_clean_store(setup_path) as store:
times = date_range(
"2013-10-26 23:00",
"2013-10-27 01:00",
tz="Europe/London",
freq="h",
ambiguous="infer",
)
times = times._with_freq(None) # freq doesn't round-trip
for i in [times, times + pd.Timedelta("10min")]:
_maybe_remove(store, "df")
df = DataFrame({"A": range(len(i)), "B": i}, index=i)
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
def test_read_with_where_tz_aware_index(tmp_path, setup_path):
# GH 11926
periods = 10
dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
expected = DataFrame({"MYCOL": 0}, index=mi)
key = "mykey"
path = tmp_path / setup_path
with pd.HDFStore(path) as store:
store.append(key, expected, format="table", append=True)
result = pd.read_hdf(path, key, where="DATE > 20151130")
tm.assert_frame_equal(result, expected)
def test_py2_created_with_datetimez(datapath):
# The test HDF5 file was created in Python 2, but could not be read in
# Python 3.
#
# GH26443
index = DatetimeIndex(["2019-01-01T18:00"], dtype="M8[ns, America/New_York]")
expected = DataFrame({"data": 123}, index=index)
with ensure_clean_store(
datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
) as store:
result = store["key"]
tm.assert_frame_equal(result, expected)