keshe/.venv/Lib/site-packages/pandas/tests/test_downstream.py

"""
Testing that we work in the downstream packages
"""
import array
import subprocess
import sys

import numpy as np
import pytest

from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    DataFrame,
    DatetimeIndex,
    Series,
    TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.arrays import (
    DatetimeArray,
    TimedeltaArray,
)


@pytest.fixture
def df():
    return DataFrame({"A": [1, 2, 3]})


def test_dask(df):
    # dask sets "compute.use_numexpr" to False, so catch the current value
    # and ensure to reset it afterwards to avoid impacting other tests
    olduse = pd.get_option("compute.use_numexpr")

    try:
        pytest.importorskip("toolz")
        dd = pytest.importorskip("dask.dataframe")

        ddf = dd.from_pandas(df, npartitions=3)
        assert ddf.A is not None
        assert ddf.compute() is not None
    finally:
        pd.set_option("compute.use_numexpr", olduse)


def test_dask_ufunc():
    # dask sets "compute.use_numexpr" to False, so catch the current value
    # and ensure to reset it afterwards to avoid impacting other tests
    olduse = pd.get_option("compute.use_numexpr")

    try:
        da = pytest.importorskip("dask.array")
        dd = pytest.importorskip("dask.dataframe")

        s = Series([1.5, 2.3, 3.7, 4.0])
        ds = dd.from_pandas(s, npartitions=2)

        result = da.fix(ds).compute()
        expected = np.fix(s)
        tm.assert_series_equal(result, expected)
    finally:
        pd.set_option("compute.use_numexpr", olduse)


def test_construct_dask_float_array_int_dtype_match_ndarray():
    # GH#40110 make sure we treat a float-dtype dask array with the same
    #  rules we would for an ndarray
    dd = pytest.importorskip("dask.dataframe")

    arr = np.array([1, 2.5, 3])
    darr = dd.from_array(arr)

    res = Series(darr)
    expected = Series(arr)
    tm.assert_series_equal(res, expected)

    # GH#49599 in 2.0 we raise instead of silently ignoring the dtype
    msg = "Trying to coerce float values to integers"
    with pytest.raises(ValueError, match=msg):
        Series(darr, dtype="i8")

    msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
    arr[2] = np.nan
    with pytest.raises(IntCastingNaNError, match=msg):
        Series(darr, dtype="i8")
    # which is the same as we get with a numpy input
    with pytest.raises(IntCastingNaNError, match=msg):
        Series(arr, dtype="i8")


def test_xarray(df):
    pytest.importorskip("xarray")

    assert df.to_xarray() is not None


def test_xarray_cftimeindex_nearest():
    # https://github.com/pydata/xarray/issues/3751
    cftime = pytest.importorskip("cftime")
    xarray = pytest.importorskip("xarray")

    times = xarray.cftime_range("0001", periods=2)
    key = cftime.DatetimeGregorian(2000, 1, 1)
    result = times.get_indexer([key], method="nearest")
    expected = 1
    assert result == expected


@pytest.mark.single_cpu
def test_oo_optimizable():
    # GH 21071
    subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])


@pytest.mark.single_cpu
def test_oo_optimized_datetime_index_unpickle():
    # GH 42866
    subprocess.check_call(
        [
            sys.executable,
            "-OO",
            "-c",
            (
                "import pandas as pd, pickle; "
                "pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"
            ),
        ]
    )


def test_statsmodels():
    smf = pytest.importorskip("statsmodels.formula.api")

    df = DataFrame(
        {"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}
    )
    smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()


def test_scikit_learn():
    pytest.importorskip("sklearn")
    from sklearn import (
        datasets,
        svm,
    )

    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.0)
    clf.fit(digits.data[:-1], digits.target[:-1])
    clf.predict(digits.data[-1:])


def test_seaborn():
    seaborn = pytest.importorskip("seaborn")
    tips = DataFrame(
        {"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}
    )
    seaborn.stripplot(x="day", y="total_bill", data=tips)


def test_pandas_datareader():
    pytest.importorskip("pandas_datareader")


@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_pyarrow(df):
    pyarrow = pytest.importorskip("pyarrow")
    table = pyarrow.Table.from_pandas(df)
    result = table.to_pandas()
    tm.assert_frame_equal(result, df)


def test_yaml_dump(df):
    # GH#42748
    yaml = pytest.importorskip("yaml")

    dumped = yaml.dump(df)

    loaded = yaml.load(dumped, Loader=yaml.Loader)
    tm.assert_frame_equal(df, loaded)

    loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)
    tm.assert_frame_equal(df, loaded2)


@pytest.mark.single_cpu
def test_missing_required_dependency():
    # GH 23868
    # To ensure proper isolation, we pass these flags
    # -S : disable site-packages
    # -s : disable user site-packages
    # -E : disable PYTHON* env vars, especially PYTHONPATH
    # https://github.com/MacPython/pandas-wheels/pull/50

    pyexe = sys.executable.replace("\\", "/")

    # We skip this test if pandas is installed as a site package. We first
    # import the package normally and check the path to the module before
    # executing the test which imports pandas with site packages disabled.
    call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]
    output = subprocess.check_output(call).decode()
    if "site-packages" in output:
        pytest.skip("pandas installed as site package")

    # This test will fail if pandas is installed as a site package. The flags
    # prevent pandas being imported and the test will report Failed: DID NOT
    # RAISE <class 'subprocess.CalledProcessError'>
    call = [pyexe, "-sSE", "-c", "import pandas"]

    msg = (
        rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "
        "returned non-zero exit status 1."
    )

    with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:
        subprocess.check_output(call, stderr=subprocess.STDOUT)

    output = exc.value.stdout.decode()
    for name in ["numpy", "pytz", "dateutil"]:
        assert name in output


def test_frame_setitem_dask_array_into_new_col():
    # GH#47128

    # dask sets "compute.use_numexpr" to False, so catch the current value
    # and ensure to reset it afterwards to avoid impacting other tests
    olduse = pd.get_option("compute.use_numexpr")

    try:
        da = pytest.importorskip("dask.array")

        dda = da.array([1, 2])
        df = DataFrame({"a": ["a", "b"]})
        df["b"] = dda
        df["c"] = dda
        df.loc[[False, True], "b"] = 100
        result = df.loc[[1], :]
        expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
        tm.assert_frame_equal(result, expected)
    finally:
        pd.set_option("compute.use_numexpr", olduse)


def test_pandas_priority():
    # GH#48347

    class MyClass:
        __pandas_priority__ = 5000

        def __radd__(self, other):
            return self

    left = MyClass()
    right = Series(range(3))

    assert right.__add__(left) is NotImplemented
    assert right + left is left


@pytest.fixture(
    params=[
        "memoryview",
        "array",
        pytest.param("dask", marks=td.skip_if_no("dask.array")),
        pytest.param("xarray", marks=td.skip_if_no("xarray")),
    ]
)
def array_likes(request):
    """
    Fixture giving a numpy array and a parametrized 'data' object, which can
    be a memoryview, array, dask or xarray object created from the numpy array.
    """
    # GH#24539 recognize e.g xarray, dask, ...
    arr = np.array([1, 2, 3], dtype=np.int64)

    name = request.param
    if name == "memoryview":
        data = memoryview(arr)
    elif name == "array":
        data = array.array("i", arr)
    elif name == "dask":
        import dask.array

        data = dask.array.array(arr)
    elif name == "xarray":
        import xarray as xr

        data = xr.DataArray(arr)

    return arr, data


@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_from_obscure_array(dtype, array_likes):
    # GH#24539 recognize e.g xarray, dask, ...
    # Note: we dont do this for PeriodArray bc _from_sequence won't accept
    #  an array of integers
    # TODO: could check with arraylike of Period objects
    arr, data = array_likes

    cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]

    depr_msg = f"{cls.__name__}.__init__ is deprecated"
    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
        expected = cls(arr)
    result = cls._from_sequence(data, dtype=dtype)
    tm.assert_extension_array_equal(result, expected)

    if not isinstance(data, memoryview):
        # FIXME(GH#44431) these raise on memoryview and attempted fix
        #  fails on py3.10
        func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
        result = func(arr).array
        expected = func(data).array
        tm.assert_equal(result, expected)

    # Let's check the Indexes while we're here
    idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
    result = idx_cls(arr)
    expected = idx_cls(data)
    tm.assert_index_equal(result, expected)


def test_dataframe_consortium() -> None:
    """
    Test some basic methods of the dataframe consortium standard.

    Full testing is done at https://github.com/data-apis/dataframe-api-compat,
    this is just to check that the entry point works as expected.
    """
    pytest.importorskip("dataframe_api_compat")
    df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    df = df_pd.__dataframe_consortium_standard__()
    result_1 = df.get_column_names()
    expected_1 = ["a", "b"]
    assert result_1 == expected_1

    ser = Series([1, 2, 3], name="a")
    col = ser.__column_consortium_standard__()
    assert col.name == "a"


def test_xarray_coerce_unit():
    # GH44053
    xr = pytest.importorskip("xarray")

    arr = xr.DataArray([1, 2, 3])
    result = pd.to_datetime(arr, unit="ns")
    expected = DatetimeIndex(
        [
            "1970-01-01 00:00:00.000000001",
            "1970-01-01 00:00:00.000000002",
            "1970-01-01 00:00:00.000000003",
        ],
        dtype="datetime64[ns]",
        freq=None,
    )
    tm.assert_index_equal(result, expected)
第一次提交 6 months ago			`"""`
			`Testing that we work in the downstream packages`
			`"""`
			`import array`
			`import subprocess`
			`import sys`

			`import numpy as np`
			`import pytest`

			`from pandas.errors import IntCastingNaNError`
			`import pandas.util._test_decorators as td`

			`import pandas as pd`
			`from pandas import (`
			`DataFrame,`
			`DatetimeIndex,`
			`Series,`
			`TimedeltaIndex,`
			`)`
			`import pandas._testing as tm`
			`from pandas.core.arrays import (`
			`DatetimeArray,`
			`TimedeltaArray,`
			`)`


			`@pytest.fixture`
			`def df():`
			`return DataFrame({"A": [1, 2, 3]})`


			`def test_dask(df):`
			`# dask sets "compute.use_numexpr" to False, so catch the current value`
			`# and ensure to reset it afterwards to avoid impacting other tests`
			`olduse = pd.get_option("compute.use_numexpr")`

			`try:`
			`pytest.importorskip("toolz")`
			`dd = pytest.importorskip("dask.dataframe")`

			`ddf = dd.from_pandas(df, npartitions=3)`
			`assert ddf.A is not None`
			`assert ddf.compute() is not None`
			`finally:`
			`pd.set_option("compute.use_numexpr", olduse)`


			`def test_dask_ufunc():`
			`# dask sets "compute.use_numexpr" to False, so catch the current value`
			`# and ensure to reset it afterwards to avoid impacting other tests`
			`olduse = pd.get_option("compute.use_numexpr")`

			`try:`
			`da = pytest.importorskip("dask.array")`
			`dd = pytest.importorskip("dask.dataframe")`

			`s = Series([1.5, 2.3, 3.7, 4.0])`
			`ds = dd.from_pandas(s, npartitions=2)`

			`result = da.fix(ds).compute()`
			`expected = np.fix(s)`
			`tm.assert_series_equal(result, expected)`
			`finally:`
			`pd.set_option("compute.use_numexpr", olduse)`


			`def test_construct_dask_float_array_int_dtype_match_ndarray():`
			`# GH#40110 make sure we treat a float-dtype dask array with the same`
			`# rules we would for an ndarray`
			`dd = pytest.importorskip("dask.dataframe")`

			`arr = np.array([1, 2.5, 3])`
			`darr = dd.from_array(arr)`

			`res = Series(darr)`
			`expected = Series(arr)`
			`tm.assert_series_equal(res, expected)`

			`# GH#49599 in 2.0 we raise instead of silently ignoring the dtype`
			`msg = "Trying to coerce float values to integers"`
			`with pytest.raises(ValueError, match=msg):`
			`Series(darr, dtype="i8")`

			`msg = r"Cannot convert non-finite values \(NA or inf\) to integer"`
			`arr[2] = np.nan`
			`with pytest.raises(IntCastingNaNError, match=msg):`
			`Series(darr, dtype="i8")`
			`# which is the same as we get with a numpy input`
			`with pytest.raises(IntCastingNaNError, match=msg):`
			`Series(arr, dtype="i8")`


			`def test_xarray(df):`
			`pytest.importorskip("xarray")`

			`assert df.to_xarray() is not None`


			`def test_xarray_cftimeindex_nearest():`
			`# https://github.com/pydata/xarray/issues/3751`
			`cftime = pytest.importorskip("cftime")`
			`xarray = pytest.importorskip("xarray")`

			`times = xarray.cftime_range("0001", periods=2)`
			`key = cftime.DatetimeGregorian(2000, 1, 1)`
			`result = times.get_indexer([key], method="nearest")`
			`expected = 1`
			`assert result == expected`


			`@pytest.mark.single_cpu`
			`def test_oo_optimizable():`
			`# GH 21071`
			`subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])`


			`@pytest.mark.single_cpu`
			`def test_oo_optimized_datetime_index_unpickle():`
			`# GH 42866`
			`subprocess.check_call(`
			`[`
			`sys.executable,`
			`"-OO",`
			`"-c",`
			`(`
			`"import pandas as pd, pickle; "`
			`"pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"`
			`),`
			`]`
			`)`


			`def test_statsmodels():`
			`smf = pytest.importorskip("statsmodels.formula.api")`

			`df = DataFrame(`
			`{"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}`
			`)`
			`smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()`


			`def test_scikit_learn():`
			`pytest.importorskip("sklearn")`
			`from sklearn import (`
			`datasets,`
			`svm,`
			`)`

			`digits = datasets.load_digits()`
			`clf = svm.SVC(gamma=0.001, C=100.0)`
			`clf.fit(digits.data[:-1], digits.target[:-1])`
			`clf.predict(digits.data[-1:])`


			`def test_seaborn():`
			`seaborn = pytest.importorskip("seaborn")`
			`tips = DataFrame(`
			`{"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}`
			`)`
			`seaborn.stripplot(x="day", y="total_bill", data=tips)`


			`def test_pandas_datareader():`
			`pytest.importorskip("pandas_datareader")`


			`@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")`
			`def test_pyarrow(df):`
			`pyarrow = pytest.importorskip("pyarrow")`
			`table = pyarrow.Table.from_pandas(df)`
			`result = table.to_pandas()`
			`tm.assert_frame_equal(result, df)`


			`def test_yaml_dump(df):`
			`# GH#42748`
			`yaml = pytest.importorskip("yaml")`

			`dumped = yaml.dump(df)`

			`loaded = yaml.load(dumped, Loader=yaml.Loader)`
			`tm.assert_frame_equal(df, loaded)`

			`loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)`
			`tm.assert_frame_equal(df, loaded2)`


			`@pytest.mark.single_cpu`
			`def test_missing_required_dependency():`
			`# GH 23868`
			`# To ensure proper isolation, we pass these flags`
			`# -S : disable site-packages`
			`# -s : disable user site-packages`
			`# -E : disable PYTHON* env vars, especially PYTHONPATH`
			`# https://github.com/MacPython/pandas-wheels/pull/50`

			`pyexe = sys.executable.replace("\\", "/")`

			`# We skip this test if pandas is installed as a site package. We first`
			`# import the package normally and check the path to the module before`
			`# executing the test which imports pandas with site packages disabled.`
			`call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]`
			`output = subprocess.check_output(call).decode()`
			`if "site-packages" in output:`
			`pytest.skip("pandas installed as site package")`

			`# This test will fail if pandas is installed as a site package. The flags`
			`# prevent pandas being imported and the test will report Failed: DID NOT`
			`# RAISE <class 'subprocess.CalledProcessError'>`
			`call = [pyexe, "-sSE", "-c", "import pandas"]`

			`msg = (`
			`rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "`
			`"returned non-zero exit status 1."`
			`)`

			`with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:`
			`subprocess.check_output(call, stderr=subprocess.STDOUT)`

			`output = exc.value.stdout.decode()`
			`for name in ["numpy", "pytz", "dateutil"]:`
			`assert name in output`


			`def test_frame_setitem_dask_array_into_new_col():`
			`# GH#47128`

			`# dask sets "compute.use_numexpr" to False, so catch the current value`
			`# and ensure to reset it afterwards to avoid impacting other tests`
			`olduse = pd.get_option("compute.use_numexpr")`

			`try:`
			`da = pytest.importorskip("dask.array")`

			`dda = da.array([1, 2])`
			`df = DataFrame({"a": ["a", "b"]})`
			`df["b"] = dda`
			`df["c"] = dda`
			`df.loc[[False, True], "b"] = 100`
			`result = df.loc[[1], :]`
			`expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])`
			`tm.assert_frame_equal(result, expected)`
			`finally:`
			`pd.set_option("compute.use_numexpr", olduse)`


			`def test_pandas_priority():`
			`# GH#48347`

			`class MyClass:`
			`__pandas_priority__ = 5000`

			`def __radd__(self, other):`
			`return self`

			`left = MyClass()`
			`right = Series(range(3))`

			`assert right.__add__(left) is NotImplemented`
			`assert right + left is left`


			`@pytest.fixture(`
			`params=[`
			`"memoryview",`
			`"array",`
			`pytest.param("dask", marks=td.skip_if_no("dask.array")),`
			`pytest.param("xarray", marks=td.skip_if_no("xarray")),`
			`]`
			`)`
			`def array_likes(request):`
			`"""`
			`Fixture giving a numpy array and a parametrized 'data' object, which can`
			`be a memoryview, array, dask or xarray object created from the numpy array.`
			`"""`
			`# GH#24539 recognize e.g xarray, dask, ...`
			`arr = np.array([1, 2, 3], dtype=np.int64)`

			`name = request.param`
			`if name == "memoryview":`
			`data = memoryview(arr)`
			`elif name == "array":`
			`data = array.array("i", arr)`
			`elif name == "dask":`
			`import dask.array`

			`data = dask.array.array(arr)`
			`elif name == "xarray":`
			`import xarray as xr`

			`data = xr.DataArray(arr)`

			`return arr, data`


			`@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])`
			`def test_from_obscure_array(dtype, array_likes):`
			`# GH#24539 recognize e.g xarray, dask, ...`
			`# Note: we dont do this for PeriodArray bc _from_sequence won't accept`
			`# an array of integers`
			`# TODO: could check with arraylike of Period objects`
			`arr, data = array_likes`

			`cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]`

			`depr_msg = f"{cls.__name__}.__init__ is deprecated"`
			`with tm.assert_produces_warning(FutureWarning, match=depr_msg):`
			`expected = cls(arr)`
			`result = cls._from_sequence(data, dtype=dtype)`
			`tm.assert_extension_array_equal(result, expected)`

			`if not isinstance(data, memoryview):`
			`# FIXME(GH#44431) these raise on memoryview and attempted fix`
			`# fails on py3.10`
			`func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]`
			`result = func(arr).array`
			`expected = func(data).array`
			`tm.assert_equal(result, expected)`

			`# Let's check the Indexes while we're here`
			`idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]`
			`result = idx_cls(arr)`
			`expected = idx_cls(data)`
			`tm.assert_index_equal(result, expected)`


			`def test_dataframe_consortium() -> None:`
			`"""`
			`Test some basic methods of the dataframe consortium standard.`

			`Full testing is done at https://github.com/data-apis/dataframe-api-compat,`
			`this is just to check that the entry point works as expected.`
			`"""`
			`pytest.importorskip("dataframe_api_compat")`
			`df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})`
			`df = df_pd.__dataframe_consortium_standard__()`
			`result_1 = df.get_column_names()`
			`expected_1 = ["a", "b"]`
			`assert result_1 == expected_1`

			`ser = Series([1, 2, 3], name="a")`
			`col = ser.__column_consortium_standard__()`
			`assert col.name == "a"`


			`def test_xarray_coerce_unit():`
			`# GH44053`
			`xr = pytest.importorskip("xarray")`

			`arr = xr.DataArray([1, 2, 3])`
			`result = pd.to_datetime(arr, unit="ns")`
			`expected = DatetimeIndex(`
			`[`
			`"1970-01-01 00:00:00.000000001",`
			`"1970-01-01 00:00:00.000000002",`
			`"1970-01-01 00:00:00.000000003",`
			`],`
			`dtype="datetime64[ns]",`
			`freq=None,`
			`)`
			`tm.assert_index_equal(result, expected)`