You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1966 lines
47 KiB

"""
This file is very long and growing, but it was decided to not split it yet, as
it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989
Instead of splitting it was decided to define sections here:
- Configuration / Settings
- Autouse fixtures
- Common arguments
- Missing values & co.
- Classes
- Indices
- Series'
- DataFrames
- Operators & Operations
- Data sets/files
- Time zones
- Dtypes
- Misc
"""
from __future__ import annotations
from collections import abc
from datetime import (
date,
datetime,
time,
timedelta,
timezone,
)
from decimal import Decimal
import operator
import os
from typing import (
TYPE_CHECKING,
Callable,
)
from dateutil.tz import (
tzlocal,
tzutc,
)
import hypothesis
from hypothesis import strategies as st
import numpy as np
import pytest
from pytz import (
FixedOffset,
utc,
)
from pandas._config.config import _get_option
import pandas.util._test_decorators as td
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
IntervalDtype,
)
import pandas as pd
from pandas import (
CategoricalIndex,
DataFrame,
Interval,
IntervalIndex,
Period,
RangeIndex,
Series,
Timedelta,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core import ops
from pandas.core.indexes.api import (
Index,
MultiIndex,
)
from pandas.util.version import Version
if TYPE_CHECKING:
from collections.abc import (
Hashable,
Iterator,
)
try:
import pyarrow as pa
except ImportError:
has_pyarrow = False
else:
del pa
has_pyarrow = True
import zoneinfo
try:
zoneinfo.ZoneInfo("UTC")
except zoneinfo.ZoneInfoNotFoundError:
zoneinfo = None # type: ignore[assignment]
# ----------------------------------------------------------------
# Configuration / Settings
# ----------------------------------------------------------------
# pytest
def pytest_addoption(parser) -> None:
parser.addoption(
"--no-strict-data-files",
action="store_false",
help="Don't fail if a test is skipped for missing data file.",
)
def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
"""Ignore doctest warning.
Parameters
----------
item : pytest.Item
pytest test item.
path : str
Module path to Python object, e.g. "pandas.core.frame.DataFrame.append". A
warning will be filtered when item.name ends with in given path. So it is
sufficient to specify e.g. "DataFrame.append".
message : str
Message to be filtered.
"""
if item.name.endswith(path):
item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}"))
def pytest_collection_modifyitems(items, config) -> None:
is_doctest = config.getoption("--doctest-modules") or config.getoption(
"--doctest-cython", default=False
)
# Warnings from doctests that can be ignored; place reason in comment above.
# Each entry specifies (path, message) - see the ignore_doctest_warning function
ignored_doctest_warnings = [
("is_int64_dtype", "is_int64_dtype is deprecated"),
("is_interval_dtype", "is_interval_dtype is deprecated"),
("is_period_dtype", "is_period_dtype is deprecated"),
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
("Series.idxmin", "The behavior of Series.idxmin"),
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
# Docstring divides by zero to show behavior difference
("missing.mask_zero_div_zero", "divide by zero encountered"),
(
"to_pydatetime",
"The behavior of DatetimeProperties.to_pydatetime is deprecated",
),
(
"pandas.core.generic.NDFrame.bool",
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
(
"pandas.core.generic.NDFrame.first",
"first is deprecated and will be removed in a future version. "
"Please create a mask and filter using `.loc` instead",
),
(
"Resampler.fillna",
"DatetimeIndexResampler.fillna is deprecated",
),
(
"DataFrameGroupBy.fillna",
"DataFrameGroupBy.fillna with 'method' is deprecated",
),
(
"DataFrameGroupBy.fillna",
"DataFrame.fillna with 'method' is deprecated",
),
("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
]
if is_doctest:
for item in items:
for path, message in ignored_doctest_warnings:
ignore_doctest_warning(item, path, message)
hypothesis_health_checks = [hypothesis.HealthCheck.too_slow]
if Version(hypothesis.__version__) >= Version("6.83.2"):
hypothesis_health_checks.append(hypothesis.HealthCheck.differing_executors)
# Hypothesis
hypothesis.settings.register_profile(
"ci",
# Hypothesis timing checks are tuned for scalars by default, so we bump
# them from 200ms to 500ms per test case as the global default. If this
# is too short for a specific test, (a) try to make it faster, and (b)
# if it really is slow add `@settings(deadline=...)` with a working value,
# or `deadline=None` to entirely disable timeouts for that test.
# 2022-02-09: Changed deadline from 500 -> None. Deadline leads to
# non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969)
deadline=None,
suppress_health_check=tuple(hypothesis_health_checks),
)
hypothesis.settings.load_profile("ci")
# Registering these strategies makes them globally available via st.from_type,
# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split():
cls = getattr(pd.tseries.offsets, name)
st.register_type_strategy(
cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans())
)
for name in "YearBegin YearEnd BYearBegin BYearEnd".split():
cls = getattr(pd.tseries.offsets, name)
st.register_type_strategy(
cls,
st.builds(
cls,
n=st.integers(-5, 5),
normalize=st.booleans(),
month=st.integers(min_value=1, max_value=12),
),
)
for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split():
cls = getattr(pd.tseries.offsets, name)
st.register_type_strategy(
cls,
st.builds(
cls,
n=st.integers(-24, 24),
normalize=st.booleans(),
startingMonth=st.integers(min_value=1, max_value=12),
),
)
# ----------------------------------------------------------------
# Autouse fixtures
# ----------------------------------------------------------------
# https://github.com/pytest-dev/pytest/issues/11873
# Would like to avoid autouse=True, but cannot as of pytest 8.0.0
@pytest.fixture(autouse=True)
def add_doctest_imports(doctest_namespace) -> None:
"""
Make `np` and `pd` names available for doctests.
"""
doctest_namespace["np"] = np
doctest_namespace["pd"] = pd
@pytest.fixture(autouse=True)
def configure_tests() -> None:
"""
Configure settings for all tests and test modules.
"""
pd.set_option("chained_assignment", "raise")
# ----------------------------------------------------------------
# Common arguments
# ----------------------------------------------------------------
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={repr(x)}")
def axis(request):
"""
Fixture for returning the axis numbers of a DataFrame.
"""
return request.param
axis_frame = axis
@pytest.fixture(params=[1, "columns"], ids=lambda x: f"axis={repr(x)}")
def axis_1(request):
"""
Fixture for returning aliases of axis 1 of a DataFrame.
"""
return request.param
@pytest.fixture(params=[True, False, None])
def observed(request):
"""
Pass in the observed keyword to groupby for [True, False]
This indicates whether categoricals should return values for
values which are not in the grouper [False / None], or only values which
appear in the grouper [True]. [None] is supported for future compatibility
if we decide to change the default (and would need to warn if this
parameter is not passed).
"""
return request.param
@pytest.fixture(params=[True, False, None])
def ordered(request):
"""
Boolean 'ordered' parameter for Categorical.
"""
return request.param
@pytest.fixture(params=[True, False])
def skipna(request):
"""
Boolean 'skipna' parameter.
"""
return request.param
@pytest.fixture(params=["first", "last", False])
def keep(request):
"""
Valid values for the 'keep' parameter used in
.duplicated or .drop_duplicates
"""
return request.param
@pytest.fixture(params=["both", "neither", "left", "right"])
def inclusive_endpoints_fixture(request):
"""
Fixture for trying all interval 'inclusive' parameters.
"""
return request.param
@pytest.fixture(params=["left", "right", "both", "neither"])
def closed(request):
"""
Fixture for trying all interval closed parameters.
"""
return request.param
@pytest.fixture(params=["left", "right", "both", "neither"])
def other_closed(request):
"""
Secondary closed fixture to allow parametrizing over all pairs of closed.
"""
return request.param
@pytest.fixture(
params=[
None,
"gzip",
"bz2",
"zip",
"xz",
"tar",
pytest.param("zstd", marks=td.skip_if_no("zstandard")),
]
)
def compression(request):
"""
Fixture for trying common compression types in compression tests.
"""
return request.param
@pytest.fixture(
params=[
"gzip",
"bz2",
"zip",
"xz",
"tar",
pytest.param("zstd", marks=td.skip_if_no("zstandard")),
]
)
def compression_only(request):
"""
Fixture for trying common compression types in compression tests excluding
uncompressed case.
"""
return request.param
@pytest.fixture(params=[True, False])
def writable(request):
"""
Fixture that an array is writable.
"""
return request.param
@pytest.fixture(params=["inner", "outer", "left", "right"])
def join_type(request):
"""
Fixture for trying all types of join operations.
"""
return request.param
@pytest.fixture(params=["nlargest", "nsmallest"])
def nselect_method(request):
"""
Fixture for trying all nselect methods.
"""
return request.param
# ----------------------------------------------------------------
# Missing values & co.
# ----------------------------------------------------------------
@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__)
def nulls_fixture(request):
"""
Fixture for each null type in pandas.
"""
return request.param
nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture
@pytest.fixture(params=[None, np.nan, pd.NaT])
def unique_nulls_fixture(request):
"""
Fixture for each null type in pandas, each null type exactly once.
"""
return request.param
# Generate cartesian product of unique_nulls_fixture:
unique_nulls_fixture2 = unique_nulls_fixture
@pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__)
def np_nat_fixture(request):
"""
Fixture for each NaT type in numpy.
"""
return request.param
# Generate cartesian product of np_nat_fixture:
np_nat_fixture2 = np_nat_fixture
# ----------------------------------------------------------------
# Classes
# ----------------------------------------------------------------
@pytest.fixture(params=[DataFrame, Series])
def frame_or_series(request):
"""
Fixture to parametrize over DataFrame and Series.
"""
return request.param
@pytest.fixture(params=[Index, Series], ids=["index", "series"])
def index_or_series(request):
"""
Fixture to parametrize over Index and Series, made necessary by a mypy
bug, giving an error:
List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]"
See GH#29725
"""
return request.param
# Generate cartesian product of index_or_series fixture:
index_or_series2 = index_or_series
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
"""
return request.param
@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__)
def box_with_array(request):
"""
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
classes
"""
return request.param
box_with_array2 = box_with_array
@pytest.fixture
def dict_subclass() -> type[dict]:
"""
Fixture for a dictionary subclass.
"""
class TestSubDict(dict):
def __init__(self, *args, **kwargs) -> None:
dict.__init__(self, *args, **kwargs)
return TestSubDict
@pytest.fixture
def non_dict_mapping_subclass() -> type[abc.Mapping]:
"""
Fixture for a non-mapping dictionary subclass.
"""
class TestNonDictMapping(abc.Mapping):
def __init__(self, underlying_dict) -> None:
self._data = underlying_dict
def __getitem__(self, key):
return self._data.__getitem__(key)
def __iter__(self) -> Iterator:
return self._data.__iter__()
def __len__(self) -> int:
return self._data.__len__()
return TestNonDictMapping
# ----------------------------------------------------------------
# Indices
# ----------------------------------------------------------------
@pytest.fixture
def multiindex_year_month_day_dataframe_random_data():
"""
DataFrame with 3 level MultiIndex (year, month, day) covering
first 100 business days from 2000-01-01 with random data
"""
tdf = DataFrame(
np.random.default_rng(2).standard_normal((100, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=100, freq="B"),
)
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
# use int64 Index, to make sure things work
ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels])
ymd.index.set_names(["year", "month", "day"], inplace=True)
return ymd
@pytest.fixture
def lexsorted_two_level_string_multiindex() -> MultiIndex:
"""
2-level MultiIndex, lexsorted, with string names.
"""
return MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
@pytest.fixture
def multiindex_dataframe_random_data(
lexsorted_two_level_string_multiindex,
) -> DataFrame:
"""DataFrame with 2 level MultiIndex with random data"""
index = lexsorted_two_level_string_multiindex
return DataFrame(
np.random.default_rng(2).standard_normal((10, 3)),
index=index,
columns=Index(["A", "B", "C"], name="exp"),
)
def _create_multiindex():
"""
MultiIndex used to test the general functionality of this object
"""
# See Also: tests.multi.conftest.idx
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
return MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
def _create_mi_with_dt64tz_level():
"""
MultiIndex with a level that is a tzaware DatetimeIndex.
"""
# GH#8367 round trip with pickle
return MultiIndex.from_product(
[[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
names=["one", "two", "three"],
)
indices_dict = {
"string": Index([f"pandas_{i}" for i in range(100)]),
"datetime": date_range("2020-01-01", periods=100),
"datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"),
"period": period_range("2020-01-01", periods=100, freq="D"),
"timedelta": timedelta_range(start="1 day", periods=100, freq="D"),
"range": RangeIndex(100),
"int8": Index(np.arange(100), dtype="int8"),
"int16": Index(np.arange(100), dtype="int16"),
"int32": Index(np.arange(100), dtype="int32"),
"int64": Index(np.arange(100), dtype="int64"),
"uint8": Index(np.arange(100), dtype="uint8"),
"uint16": Index(np.arange(100), dtype="uint16"),
"uint32": Index(np.arange(100), dtype="uint32"),
"uint64": Index(np.arange(100), dtype="uint64"),
"float32": Index(np.arange(100), dtype="float32"),
"float64": Index(np.arange(100), dtype="float64"),
"bool-object": Index([True, False] * 5, dtype=object),
"bool-dtype": Index([True, False] * 5, dtype=bool),
"complex64": Index(
np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64")
),
"complex128": Index(
np.arange(100, dtype="complex128") + 1.0j * np.arange(100, dtype="complex128")
),
"categorical": CategoricalIndex(list("abcd") * 25),
"interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
"empty": Index([]),
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
"mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
"multi": _create_multiindex(),
"repeats": Index([0, 0, 1, 1, 2, 2]),
"nullable_int": Index(np.arange(100), dtype="Int64"),
"nullable_uint": Index(np.arange(100), dtype="UInt16"),
"nullable_float": Index(np.arange(100), dtype="Float32"),
"nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"),
"string-python": Index(
pd.array([f"pandas_{i}" for i in range(100)], dtype="string[python]")
),
}
if has_pyarrow:
idx = Index(pd.array([f"pandas_{i}" for i in range(100)], dtype="string[pyarrow]"))
indices_dict["string-pyarrow"] = idx
@pytest.fixture(params=indices_dict.keys())
def index(request):
"""
Fixture for many "simple" kinds of indices.
These indices are unlikely to cover corner cases, e.g.
- no names
- no NaTs/NaNs
- no values near implementation bounds
- ...
"""
# copy to avoid mutation, e.g. setting .name
return indices_dict[request.param].copy()
# Needed to generate cartesian product of indices
index_fixture2 = index
@pytest.fixture(
params=[
key for key, value in indices_dict.items() if not isinstance(value, MultiIndex)
]
)
def index_flat(request):
"""
index fixture, but excluding MultiIndex cases.
"""
key = request.param
return indices_dict[key].copy()
# Alias so we can test with cartesian product of index_flat
index_flat2 = index_flat
@pytest.fixture(
params=[
key
for key, value in indices_dict.items()
if not (
key.startswith(("int", "uint", "float"))
or key in ["range", "empty", "repeats", "bool-dtype"]
)
and not isinstance(value, MultiIndex)
]
)
def index_with_missing(request):
"""
Fixture for indices with missing values.
Integer-dtype and empty cases are excluded because they cannot hold missing
values.
MultiIndex is excluded because isna() is not defined for MultiIndex.
"""
# GH 35538. Use deep copy to avoid illusive bug on np-dev
# GHA pipeline that writes into indices_dict despite copy
ind = indices_dict[request.param].copy(deep=True)
vals = ind.values.copy()
if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
# For setting missing values in the top level of MultiIndex
vals = ind.tolist()
vals[0] = (None,) + vals[0][1:]
vals[-1] = (None,) + vals[-1][1:]
return MultiIndex.from_tuples(vals)
else:
vals[0] = None
vals[-1] = None
return type(ind)(vals)
# ----------------------------------------------------------------
# Series'
# ----------------------------------------------------------------
@pytest.fixture
def string_series() -> Series:
"""
Fixture for Series of floats with Index of unique strings
"""
return Series(
np.arange(30, dtype=np.float64) * 1.1,
index=Index([f"i_{i}" for i in range(30)], dtype=object),
name="series",
)
@pytest.fixture
def object_series() -> Series:
"""
Fixture for Series of dtype object with Index of unique strings
"""
data = [f"foo_{i}" for i in range(30)]
index = Index([f"bar_{i}" for i in range(30)], dtype=object)
return Series(data, index=index, name="objects", dtype=object)
@pytest.fixture
def datetime_series() -> Series:
"""
Fixture for Series of floats with DatetimeIndex
"""
return Series(
np.random.default_rng(2).standard_normal(30),
index=date_range("2000-01-01", periods=30, freq="B"),
name="ts",
)
def _create_series(index):
"""Helper for the _series dict"""
size = len(index)
data = np.random.default_rng(2).standard_normal(size)
return Series(data, index=index, name="a", copy=False)
_series = {
f"series-with-{index_id}-index": _create_series(index)
for index_id, index in indices_dict.items()
}
@pytest.fixture
def series_with_simple_index(index) -> Series:
"""
Fixture for tests on series with changing types of indices.
"""
return _create_series(index)
_narrow_series = {
f"{dtype.__name__}-series": Series(
range(30), index=[f"i-{i}" for i in range(30)], name="a", dtype=dtype
)
for dtype in tm.NARROW_NP_DTYPES
}
_index_or_series_objs = {**indices_dict, **_series, **_narrow_series}
@pytest.fixture(params=_index_or_series_objs.keys())
def index_or_series_obj(request):
"""
Fixture for tests on indexes, series and series with a narrow dtype
copy to avoid mutation, e.g. setting .name
"""
return _index_or_series_objs[request.param].copy(deep=True)
_typ_objects_series = {
f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES
}
_index_or_series_memory_objs = {
**indices_dict,
**_series,
**_narrow_series,
**_typ_objects_series,
}
@pytest.fixture(params=_index_or_series_memory_objs.keys())
def index_or_series_memory_obj(request):
"""
Fixture for tests on indexes, series, series with a narrow dtype and
series with empty objects type
copy to avoid mutation, e.g. setting .name
"""
return _index_or_series_memory_objs[request.param].copy(deep=True)
# ----------------------------------------------------------------
# DataFrames
# ----------------------------------------------------------------
@pytest.fixture
def int_frame() -> DataFrame:
"""
Fixture for DataFrame of ints with index of unique strings
Columns are ['A', 'B', 'C', 'D']
"""
return DataFrame(
np.ones((30, 4), dtype=np.int64),
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD"), dtype=object),
)
@pytest.fixture
def float_frame() -> DataFrame:
"""
Fixture for DataFrame of floats with index of unique strings
Columns are ['A', 'B', 'C', 'D'].
"""
return DataFrame(
np.random.default_rng(2).standard_normal((30, 4)),
index=Index([f"foo_{i}" for i in range(30)]),
columns=Index(list("ABCD")),
)
@pytest.fixture
def rand_series_with_duplicate_datetimeindex() -> Series:
"""
Fixture for Series with a DatetimeIndex that has duplicates.
"""
dates = [
datetime(2000, 1, 2),
datetime(2000, 1, 2),
datetime(2000, 1, 2),
datetime(2000, 1, 3),
datetime(2000, 1, 3),
datetime(2000, 1, 3),
datetime(2000, 1, 4),
datetime(2000, 1, 4),
datetime(2000, 1, 4),
datetime(2000, 1, 5),
]
return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates)
# ----------------------------------------------------------------
# Scalars
# ----------------------------------------------------------------
@pytest.fixture(
params=[
(Interval(left=0, right=5), IntervalDtype("int64", "right")),
(Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")),
(Period("2012-01", freq="M"), "period[M]"),
(Period("2012-02-01", freq="D"), "period[D]"),
(
Timestamp("2011-01-01", tz="US/Eastern"),
DatetimeTZDtype(unit="s", tz="US/Eastern"),
),
(Timedelta(seconds=500), "timedelta64[ns]"),
]
)
def ea_scalar_and_dtype(request):
return request.param
# ----------------------------------------------------------------
# Operators & Operations
# ----------------------------------------------------------------
@pytest.fixture(params=tm.arithmetic_dunder_methods)
def all_arithmetic_operators(request):
"""
Fixture for dunder names for common arithmetic operations.
"""
return request.param
@pytest.fixture(
params=[
operator.add,
ops.radd,
operator.sub,
ops.rsub,
operator.mul,
ops.rmul,
operator.truediv,
ops.rtruediv,
operator.floordiv,
ops.rfloordiv,
operator.mod,
ops.rmod,
operator.pow,
ops.rpow,
operator.eq,
operator.ne,
operator.lt,
operator.le,
operator.gt,
operator.ge,
operator.and_,
ops.rand_,
operator.xor,
ops.rxor,
operator.or_,
ops.ror_,
]
)
def all_binary_operators(request):
"""
Fixture for operator and roperator arithmetic, comparison, and logical ops.
"""
return request.param
@pytest.fixture(
params=[
operator.add,
ops.radd,
operator.sub,
ops.rsub,
operator.mul,
ops.rmul,
operator.truediv,
ops.rtruediv,
operator.floordiv,
ops.rfloordiv,
operator.mod,
ops.rmod,
operator.pow,
ops.rpow,
]
)
def all_arithmetic_functions(request):
"""
Fixture for operator and roperator arithmetic functions.
Notes
-----
This includes divmod and rdivmod, whereas all_arithmetic_operators
does not.
"""
return request.param
_all_numeric_reductions = [
"count",
"sum",
"max",
"min",
"mean",
"prod",
"std",
"var",
"median",
"kurt",
"skew",
"sem",
]
@pytest.fixture(params=_all_numeric_reductions)
def all_numeric_reductions(request):
"""
Fixture for numeric reduction names.
"""
return request.param
_all_boolean_reductions = ["all", "any"]
@pytest.fixture(params=_all_boolean_reductions)
def all_boolean_reductions(request):
"""
Fixture for boolean reduction names.
"""
return request.param
_all_reductions = _all_numeric_reductions + _all_boolean_reductions
@pytest.fixture(params=_all_reductions)
def all_reductions(request):
"""
Fixture for all (boolean + numeric) reduction names.
"""
return request.param
@pytest.fixture(
params=[
operator.eq,
operator.ne,
operator.gt,
operator.ge,
operator.lt,
operator.le,
]
)
def comparison_op(request):
"""
Fixture for operator module comparison functions.
"""
return request.param
@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"])
def compare_operators_no_eq_ne(request):
"""
Fixture for dunder names for compare operations except == and !=
* >=
* >
* <
* <=
"""
return request.param
@pytest.fixture(
params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"]
)
def all_logical_operators(request):
"""
Fixture for dunder names for common logical operations
* |
* &
* ^
"""
return request.param
_all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"]
@pytest.fixture(params=_all_numeric_accumulations)
def all_numeric_accumulations(request):
"""
Fixture for numeric accumulation names
"""
return request.param
# ----------------------------------------------------------------
# Data sets/files
# ----------------------------------------------------------------
@pytest.fixture
def strict_data_files(pytestconfig):
"""
Returns the configuration for the test setting `--no-strict-data-files`.
"""
return pytestconfig.getoption("--no-strict-data-files")
@pytest.fixture
def datapath(strict_data_files: str) -> Callable[..., str]:
"""
Get the path to a data file.
Parameters
----------
path : str
Path to the file, relative to ``pandas/tests/``
Returns
-------
path including ``pandas/tests``.
Raises
------
ValueError
If the path doesn't exist and the --no-strict-data-files option is not set.
"""
BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
def deco(*args):
path = os.path.join(BASE_PATH, *args)
if not os.path.exists(path):
if strict_data_files:
raise ValueError(
f"Could not find file {path} and --no-strict-data-files is not set."
)
pytest.skip(f"Could not find {path}.")
return path
return deco
# ----------------------------------------------------------------
# Time zones
# ----------------------------------------------------------------
TIMEZONES = [
None,
"UTC",
"US/Eastern",
"Asia/Tokyo",
"dateutil/US/Pacific",
"dateutil/Asia/Singapore",
"+01:15",
"-02:15",
"UTC+01:15",
"UTC-02:15",
tzutc(),
tzlocal(),
FixedOffset(300),
FixedOffset(0),
FixedOffset(-300),
timezone.utc,
timezone(timedelta(hours=1)),
timezone(timedelta(hours=-1), name="foo"),
]
if zoneinfo is not None:
TIMEZONES.extend(
[
zoneinfo.ZoneInfo("US/Pacific"), # type: ignore[list-item]
zoneinfo.ZoneInfo("UTC"), # type: ignore[list-item]
]
)
TIMEZONE_IDS = [repr(i) for i in TIMEZONES]
@td.parametrize_fixture_doc(str(TIMEZONE_IDS))
@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS)
def tz_naive_fixture(request):
"""
Fixture for trying timezones including default (None): {0}
"""
return request.param
@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:]))
@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:])
def tz_aware_fixture(request):
"""
Fixture for trying explicit timezones: {0}
"""
return request.param
# Generate cartesian product of tz_aware_fixture:
tz_aware_fixture2 = tz_aware_fixture
_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]
if zoneinfo is not None:
_UTCS.append(zoneinfo.ZoneInfo("UTC"))
@pytest.fixture(params=_UTCS)
def utc_fixture(request):
"""
Fixture to provide variants of UTC timezone strings and tzinfo objects.
"""
return request.param
utc_fixture2 = utc_fixture
@pytest.fixture(params=["s", "ms", "us", "ns"])
def unit(request):
"""
datetime64 units we support.
"""
return request.param
unit2 = unit
# ----------------------------------------------------------------
# Dtypes
# ----------------------------------------------------------------
@pytest.fixture(params=tm.STRING_DTYPES)
def string_dtype(request):
"""
Parametrized fixture for string dtypes.
* str
* 'str'
* 'U'
"""
return request.param
@pytest.fixture(
params=[
"string[python]",
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
]
)
def nullable_string_dtype(request):
"""
Parametrized fixture for string dtypes.
* 'string[python]'
* 'string[pyarrow]'
"""
return request.param
@pytest.fixture(
params=[
"python",
pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
pytest.param("pyarrow_numpy", marks=td.skip_if_no("pyarrow")),
]
)
def string_storage(request):
"""
Parametrized fixture for pd.options.mode.string_storage.
* 'python'
* 'pyarrow'
* 'pyarrow_numpy'
"""
return request.param
@pytest.fixture(
params=[
"numpy_nullable",
pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
]
)
def dtype_backend(request):
"""
Parametrized fixture for pd.options.mode.string_storage.
* 'python'
* 'pyarrow'
"""
return request.param
# Alias so we can test with cartesian product of string_storage
string_storage2 = string_storage
@pytest.fixture(params=tm.BYTES_DTYPES)
def bytes_dtype(request):
"""
Parametrized fixture for bytes dtypes.
* bytes
* 'bytes'
"""
return request.param
@pytest.fixture(params=tm.OBJECT_DTYPES)
def object_dtype(request):
"""
Parametrized fixture for object dtypes.
* object
* 'object'
"""
return request.param
@pytest.fixture(
params=[
"object",
"string[python]",
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
]
)
def any_string_dtype(request):
"""
Parametrized fixture for string dtypes.
* 'object'
* 'string[python]'
* 'string[pyarrow]'
"""
return request.param
@pytest.fixture(params=tm.DATETIME64_DTYPES)
def datetime64_dtype(request):
"""
Parametrized fixture for datetime64 dtypes.
* 'datetime64[ns]'
* 'M8[ns]'
"""
return request.param
@pytest.fixture(params=tm.TIMEDELTA64_DTYPES)
def timedelta64_dtype(request):
"""
Parametrized fixture for timedelta64 dtypes.
* 'timedelta64[ns]'
* 'm8[ns]'
"""
return request.param
@pytest.fixture
def fixed_now_ts() -> Timestamp:
"""
Fixture emits fixed Timestamp.now()
"""
return Timestamp( # pyright: ignore[reportGeneralTypeIssues]
year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22
)
@pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES)
def float_numpy_dtype(request):
"""
Parameterized fixture for float dtypes.
* float
* 'float32'
* 'float64'
"""
return request.param
@pytest.fixture(params=tm.FLOAT_EA_DTYPES)
def float_ea_dtype(request):
"""
Parameterized fixture for float dtypes.
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.ALL_FLOAT_DTYPES)
def any_float_dtype(request):
"""
Parameterized fixture for float dtypes.
* float
* 'float32'
* 'float64'
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.COMPLEX_DTYPES)
def complex_dtype(request):
"""
Parameterized fixture for complex dtypes.
* complex
* 'complex64'
* 'complex128'
"""
return request.param
@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES)
def any_signed_int_numpy_dtype(request):
"""
Parameterized fixture for signed integer dtypes.
* int
* 'int8'
* 'int16'
* 'int32'
* 'int64'
"""
return request.param
@pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES)
def any_unsigned_int_numpy_dtype(request):
"""
Parameterized fixture for unsigned integer dtypes.
* 'uint8'
* 'uint16'
* 'uint32'
* 'uint64'
"""
return request.param
@pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES)
def any_int_numpy_dtype(request):
"""
Parameterized fixture for any integer dtype.
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
"""
return request.param
@pytest.fixture(params=tm.ALL_INT_EA_DTYPES)
def any_int_ea_dtype(request):
"""
Parameterized fixture for any nullable integer dtype.
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
"""
return request.param
@pytest.fixture(params=tm.ALL_INT_DTYPES)
def any_int_dtype(request):
"""
Parameterized fixture for any nullable integer dtype.
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
"""
return request.param
@pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES)
def any_numeric_ea_dtype(request):
"""
Parameterized fixture for any nullable integer dtype and
any float ea dtypes.
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
* 'Float32'
* 'Float64'
"""
return request.param
# Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any],
# Type[object]]]" and "List[str]")
@pytest.fixture(
params=tm.ALL_INT_EA_DTYPES
+ tm.FLOAT_EA_DTYPES
+ tm.ALL_INT_PYARROW_DTYPES_STR_REPR
+ tm.FLOAT_PYARROW_DTYPES_STR_REPR # type: ignore[operator]
)
def any_numeric_ea_and_arrow_dtype(request):
"""
Parameterized fixture for any nullable integer dtype and
any float ea dtypes.
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
* 'Float32'
* 'Float64'
* 'uint8[pyarrow]'
* 'int8[pyarrow]'
* 'uint16[pyarrow]'
* 'int16[pyarrow]'
* 'uint32[pyarrow]'
* 'int32[pyarrow]'
* 'uint64[pyarrow]'
* 'int64[pyarrow]'
* 'float32[pyarrow]'
* 'float64[pyarrow]'
"""
return request.param
@pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES)
def any_signed_int_ea_dtype(request):
"""
Parameterized fixture for any signed nullable integer dtype.
* 'Int8'
* 'Int16'
* 'Int32'
* 'Int64'
"""
return request.param
@pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES)
def any_real_numpy_dtype(request):
"""
Parameterized fixture for any (purely) real numeric dtype.
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
* float
* 'float32'
* 'float64'
"""
return request.param
@pytest.fixture(params=tm.ALL_REAL_DTYPES)
def any_real_numeric_dtype(request):
"""
Parameterized fixture for any (purely) real numeric dtype.
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
* float
* 'float32'
* 'float64'
and associated ea dtypes.
"""
return request.param
@pytest.fixture(params=tm.ALL_NUMPY_DTYPES)
def any_numpy_dtype(request):
"""
Parameterized fixture for all numpy dtypes.
* bool
* 'bool'
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
* float
* 'float32'
* 'float64'
* complex
* 'complex64'
* 'complex128'
* str
* 'str'
* 'U'
* bytes
* 'bytes'
* 'datetime64[ns]'
* 'M8[ns]'
* 'timedelta64[ns]'
* 'm8[ns]'
* object
* 'object'
"""
return request.param
@pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES)
def any_real_nullable_dtype(request):
"""
Parameterized fixture for all real dtypes that can hold NA.
* float
* 'float32'
* 'float64'
* 'Float32'
* 'Float64'
* 'UInt8'
* 'UInt16'
* 'UInt32'
* 'UInt64'
* 'Int8'
* 'Int16'
* 'Int32'
* 'Int64'
* 'uint8[pyarrow]'
* 'uint16[pyarrow]'
* 'uint32[pyarrow]'
* 'uint64[pyarrow]'
* 'int8[pyarrow]'
* 'int16[pyarrow]'
* 'int32[pyarrow]'
* 'int64[pyarrow]'
* 'float[pyarrow]'
* 'double[pyarrow]'
"""
return request.param
@pytest.fixture(params=tm.ALL_NUMERIC_DTYPES)
def any_numeric_dtype(request):
"""
Parameterized fixture for all numeric dtypes.
* int
* 'int8'
* 'uint8'
* 'int16'
* 'uint16'
* 'int32'
* 'uint32'
* 'int64'
* 'uint64'
* float
* 'float32'
* 'float64'
* complex
* 'complex64'
* 'complex128'
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
* 'Float32'
* 'Float64'
"""
return request.param
# categoricals are handled separately
_any_skipna_inferred_dtype = [
("string", ["a", np.nan, "c"]),
("string", ["a", pd.NA, "c"]),
("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array
("bytes", [b"a", np.nan, b"c"]),
("empty", [np.nan, np.nan, np.nan]),
("empty", []),
("mixed-integer", ["a", np.nan, 2]),
("mixed", ["a", np.nan, 2.0]),
("floating", [1.0, np.nan, 2.0]),
("integer", [1, np.nan, 2]),
("mixed-integer-float", [1, np.nan, 2.0]),
("decimal", [Decimal(1), np.nan, Decimal(2)]),
("boolean", [True, np.nan, False]),
("boolean", [True, pd.NA, False]),
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
("complex", [1 + 1j, np.nan, 2 + 2j]),
# The following dtype is commented out due to GH 23554
# ('timedelta64', [np.timedelta64(1, 'D'),
# np.nan, np.timedelta64(2, 'D')]),
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
("time", [time(1), np.nan, time(2)]),
("period", [Period(2013), pd.NaT, Period(2018)]),
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
]
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
def any_skipna_inferred_dtype(request):
"""
Fixture for all inferred dtypes from _libs.lib.infer_dtype
The covered (inferred) types are:
* 'string'
* 'empty'
* 'bytes'
* 'mixed'
* 'mixed-integer'
* 'mixed-integer-float'
* 'floating'
* 'integer'
* 'decimal'
* 'boolean'
* 'datetime64'
* 'datetime'
* 'date'
* 'timedelta'
* 'time'
* 'period'
* 'interval'
Returns
-------
inferred_dtype : str
The string for the inferred dtype from _libs.lib.infer_dtype
values : np.ndarray
An array of object dtype that will be inferred to have
`inferred_dtype`
Examples
--------
>>> from pandas._libs import lib
>>>
>>> def test_something(any_skipna_inferred_dtype):
... inferred_dtype, values = any_skipna_inferred_dtype
... # will pass
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
"""
inferred_dtype, values = request.param
values = np.array(values, dtype=object) # object dtype to avoid casting
# correctness of inference tested in tests/dtypes/test_inference.py
return inferred_dtype, values
# ----------------------------------------------------------------
# Misc
# ----------------------------------------------------------------
@pytest.fixture
def ip():
"""
Get an instance of IPython.InteractiveShell.
Will raise a skip if IPython is not installed.
"""
pytest.importorskip("IPython", minversion="6.0.0")
from IPython.core.interactiveshell import InteractiveShell
# GH#35711 make sure sqlite history file handle is not leaked
from traitlets.config import Config # isort:skip
c = Config()
c.HistoryManager.hist_file = ":memory:"
return InteractiveShell(config=c)
@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])
def spmatrix(request):
"""
Yields scipy sparse matrix classes.
"""
sparse = pytest.importorskip("scipy.sparse")
return getattr(sparse, request.param + "_matrix")
@pytest.fixture(
params=[
getattr(pd.offsets, o)
for o in pd.offsets.__all__
if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) and o != "Tick"
]
)
def tick_classes(request):
"""
Fixture for Tick based datetime offsets available for a time series.
"""
return request.param
@pytest.fixture(params=[None, lambda x: x])
def sort_by_key(request):
"""
Simple fixture for testing keys in sorting methods.
Tests None (no key) and the identity key.
"""
return request.param
@pytest.fixture(
params=[
("foo", None, None),
("Egon", "Venkman", None),
("NCC1701D", "NCC1701D", "NCC1701D"),
# possibly-matching NAs
(np.nan, np.nan, np.nan),
(np.nan, pd.NaT, None),
(np.nan, pd.NA, None),
(pd.NA, pd.NA, pd.NA),
]
)
def names(request) -> tuple[Hashable, Hashable, Hashable]:
"""
A 3-tuple of names, the first two for operands, the last for a result.
"""
return request.param
@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc])
def indexer_sli(request):
"""
Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__
"""
return request.param
@pytest.fixture(params=[tm.loc, tm.iloc])
def indexer_li(request):
"""
Parametrize over loc.__getitem__, iloc.__getitem__
"""
return request.param
@pytest.fixture(params=[tm.setitem, tm.iloc])
def indexer_si(request):
"""
Parametrize over __setitem__, iloc.__setitem__
"""
return request.param
@pytest.fixture(params=[tm.setitem, tm.loc])
def indexer_sl(request):
"""
Parametrize over __setitem__, loc.__setitem__
"""
return request.param
@pytest.fixture(params=[tm.at, tm.loc])
def indexer_al(request):
"""
Parametrize over at.__setitem__, loc.__setitem__
"""
return request.param
@pytest.fixture(params=[tm.iat, tm.iloc])
def indexer_ial(request):
"""
Parametrize over iat.__setitem__, iloc.__setitem__
"""
return request.param
@pytest.fixture
def using_array_manager() -> bool:
"""
Fixture to check if the array manager is being used.
"""
return _get_option("mode.data_manager", silent=True) == "array"
@pytest.fixture
def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)
@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is in warning mode.
"""
return (
pd.options.mode.copy_on_write == "warn"
and _get_option("mode.data_manager", silent=True) == "block"
)
@pytest.fixture
def using_infer_string() -> bool:
"""
Fixture to check if infer string option is enabled.
"""
return pd.options.future.infer_string is True
warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
if zoneinfo is not None:
warsaws.append(zoneinfo.ZoneInfo("Europe/Warsaw")) # type: ignore[arg-type]
@pytest.fixture(params=warsaws)
def warsaw(request) -> str:
"""
tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
"""
return request.param
@pytest.fixture()
def arrow_string_storage():
return ("pyarrow", "pyarrow_numpy")