keshe/.venv/Lib/site-packages/pandas/tests/series/test_reductions.py

import numpy as np
import pytest

import pandas as pd
from pandas import Series
import pandas._testing as tm


@pytest.mark.parametrize("operation, expected", [("min", "a"), ("max", "b")])
def test_reductions_series_strings(operation, expected):
    # GH#31746
    ser = Series(["a", "b"], dtype="string")
    res_operation_serie = getattr(ser, operation)()
    assert res_operation_serie == expected


@pytest.mark.parametrize("as_period", [True, False])
def test_mode_extension_dtype(as_period):
    # GH#41927 preserve dt64tz dtype
    ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)])

    if as_period:
        ser = ser.dt.to_period("D")
    else:
        ser = ser.dt.tz_localize("US/Central")

    res = ser.mode()
    assert res.dtype == ser.dtype
    tm.assert_series_equal(res, ser)


def test_mode_nullable_dtype(any_numeric_ea_dtype):
    # GH#55340
    ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)
    result = ser.mode(dropna=False)
    expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)
    tm.assert_series_equal(result, expected)

    result = ser.mode(dropna=True)
    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
    tm.assert_series_equal(result, expected)

    ser[-1] = pd.NA

    result = ser.mode(dropna=True)
    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
    tm.assert_series_equal(result, expected)

    result = ser.mode(dropna=False)
    expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
    tm.assert_series_equal(result, expected)


def test_mode_infer_string():
    # GH#56183
    pytest.importorskip("pyarrow")
    ser = Series(["a", "b"], dtype=object)
    with pd.option_context("future.infer_string", True):
        result = ser.mode()
    expected = Series(["a", "b"], dtype=object)
    tm.assert_series_equal(result, expected)


def test_reductions_td64_with_nat():
    # GH#8617
    ser = Series([0, pd.NaT], dtype="m8[ns]")
    exp = ser[0]
    assert ser.median() == exp
    assert ser.min() == exp
    assert ser.max() == exp


@pytest.mark.parametrize("skipna", [True, False])
def test_td64_sum_empty(skipna):
    # GH#37151
    ser = Series([], dtype="timedelta64[ns]")

    result = ser.sum(skipna=skipna)
    assert isinstance(result, pd.Timedelta)
    assert result == pd.Timedelta(0)


def test_td64_summation_overflow():
    # GH#9442
    ser = Series(pd.date_range("20130101", periods=100000, freq="h"))
    ser[0] += pd.Timedelta("1s 1ms")

    # mean
    result = (ser - ser.min()).mean()
    expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum())

    # the computation is converted to float so
    # might be some loss of precision
    assert np.allclose(result._value / 1000, expected._value / 1000)

    # sum
    msg = "overflow in timedelta operation"
    with pytest.raises(ValueError, match=msg):
        (ser - ser.min()).sum()

    s1 = ser[0:10000]
    with pytest.raises(ValueError, match=msg):
        (s1 - s1.min()).sum()
    s2 = ser[0:1000]
    (s2 - s2.min()).sum()


def test_prod_numpy16_bug():
    ser = Series([1.0, 1.0, 1.0], index=range(3))
    result = ser.prod()

    assert not isinstance(result, Series)


@pytest.mark.parametrize("func", [np.any, np.all])
@pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}])
def test_validate_any_all_out_keepdims_raises(kwargs, func):
    ser = Series([1, 2])
    param = next(iter(kwargs))
    name = func.__name__

    msg = (
        f"the '{param}' parameter is not "
        "supported in the pandas "
        rf"implementation of {name}\(\)"
    )
    with pytest.raises(ValueError, match=msg):
        func(ser, **kwargs)


def test_validate_sum_initial():
    ser = Series([1, 2])
    msg = (
        r"the 'initial' parameter is not "
        r"supported in the pandas "
        r"implementation of sum\(\)"
    )
    with pytest.raises(ValueError, match=msg):
        np.sum(ser, initial=10)


def test_validate_median_initial():
    ser = Series([1, 2])
    msg = (
        r"the 'overwrite_input' parameter is not "
        r"supported in the pandas "
        r"implementation of median\(\)"
    )
    with pytest.raises(ValueError, match=msg):
        # It seems like np.median doesn't dispatch, so we use the
        # method instead of the ufunc.
        ser.median(overwrite_input=True)


def test_validate_stat_keepdims():
    ser = Series([1, 2])
    msg = (
        r"the 'keepdims' parameter is not "
        r"supported in the pandas "
        r"implementation of sum\(\)"
    )
    with pytest.raises(ValueError, match=msg):
        np.sum(ser, keepdims=True)


def test_mean_with_convertible_string_raises(using_array_manager, using_infer_string):
    # GH#44008
    ser = Series(["1", "2"])
    if using_infer_string:
        msg = "does not support"
        with pytest.raises(TypeError, match=msg):
            ser.sum()
    else:
        assert ser.sum() == "12"
    msg = "Could not convert string '12' to numeric|does not support"
    with pytest.raises(TypeError, match=msg):
        ser.mean()

    df = ser.to_frame()
    if not using_array_manager:
        msg = r"Could not convert \['12'\] to numeric|does not support"
    with pytest.raises(TypeError, match=msg):
        df.mean()


def test_mean_dont_convert_j_to_complex(using_array_manager):
    # GH#36703
    df = pd.DataFrame([{"db": "J", "numeric": 123}])
    if using_array_manager:
        msg = "Could not convert string 'J' to numeric"
    else:
        msg = r"Could not convert \['J'\] to numeric|does not support"
    with pytest.raises(TypeError, match=msg):
        df.mean()

    with pytest.raises(TypeError, match=msg):
        df.agg("mean")

    msg = "Could not convert string 'J' to numeric|does not support"
    with pytest.raises(TypeError, match=msg):
        df["db"].mean()
    msg = "Could not convert string 'J' to numeric|ufunc 'divide'"
    with pytest.raises(TypeError, match=msg):
        np.mean(df["db"].astype("string").array)


def test_median_with_convertible_string_raises(using_array_manager):
    # GH#34671 this _could_ return a string "2", but definitely not float 2.0
    msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support"
    ser = Series(["1", "2", "3"])
    with pytest.raises(TypeError, match=msg):
        ser.median()

    if not using_array_manager:
        msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support"
    df = ser.to_frame()
    with pytest.raises(TypeError, match=msg):
        df.median()
第一次提交 6 months ago			`import numpy as np`
			`import pytest`

			`import pandas as pd`
			`from pandas import Series`
			`import pandas._testing as tm`


			`@pytest.mark.parametrize("operation, expected", [("min", "a"), ("max", "b")])`
			`def test_reductions_series_strings(operation, expected):`
			`# GH#31746`
			`ser = Series(["a", "b"], dtype="string")`
			`res_operation_serie = getattr(ser, operation)()`
			`assert res_operation_serie == expected`


			`@pytest.mark.parametrize("as_period", [True, False])`
			`def test_mode_extension_dtype(as_period):`
			`# GH#41927 preserve dt64tz dtype`
			`ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)])`

			`if as_period:`
			`ser = ser.dt.to_period("D")`
			`else:`
			`ser = ser.dt.tz_localize("US/Central")`

			`res = ser.mode()`
			`assert res.dtype == ser.dtype`
			`tm.assert_series_equal(res, ser)`


			`def test_mode_nullable_dtype(any_numeric_ea_dtype):`
			`# GH#55340`
			`ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)`
			`result = ser.mode(dropna=False)`
			`expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)`
			`tm.assert_series_equal(result, expected)`

			`result = ser.mode(dropna=True)`
			`expected = Series([2, 3], dtype=any_numeric_ea_dtype)`
			`tm.assert_series_equal(result, expected)`

			`ser[-1] = pd.NA`

			`result = ser.mode(dropna=True)`
			`expected = Series([2, 3], dtype=any_numeric_ea_dtype)`
			`tm.assert_series_equal(result, expected)`

			`result = ser.mode(dropna=False)`
			`expected = Series([pd.NA], dtype=any_numeric_ea_dtype)`
			`tm.assert_series_equal(result, expected)`


			`def test_mode_infer_string():`
			`# GH#56183`
			`pytest.importorskip("pyarrow")`
			`ser = Series(["a", "b"], dtype=object)`
			`with pd.option_context("future.infer_string", True):`
			`result = ser.mode()`
			`expected = Series(["a", "b"], dtype=object)`
			`tm.assert_series_equal(result, expected)`


			`def test_reductions_td64_with_nat():`
			`# GH#8617`
			`ser = Series([0, pd.NaT], dtype="m8[ns]")`
			`exp = ser[0]`
			`assert ser.median() == exp`
			`assert ser.min() == exp`
			`assert ser.max() == exp`


			`@pytest.mark.parametrize("skipna", [True, False])`
			`def test_td64_sum_empty(skipna):`
			`# GH#37151`
			`ser = Series([], dtype="timedelta64[ns]")`

			`result = ser.sum(skipna=skipna)`
			`assert isinstance(result, pd.Timedelta)`
			`assert result == pd.Timedelta(0)`


			`def test_td64_summation_overflow():`
			`# GH#9442`
			`ser = Series(pd.date_range("20130101", periods=100000, freq="h"))`
			`ser[0] += pd.Timedelta("1s 1ms")`

			`# mean`
			`result = (ser - ser.min()).mean()`
			`expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum())`

			`# the computation is converted to float so`
			`# might be some loss of precision`
			`assert np.allclose(result._value / 1000, expected._value / 1000)`

			`# sum`
			`msg = "overflow in timedelta operation"`
			`with pytest.raises(ValueError, match=msg):`
			`(ser - ser.min()).sum()`

			`s1 = ser[0:10000]`
			`with pytest.raises(ValueError, match=msg):`
			`(s1 - s1.min()).sum()`
			`s2 = ser[0:1000]`
			`(s2 - s2.min()).sum()`


			`def test_prod_numpy16_bug():`
			`ser = Series([1.0, 1.0, 1.0], index=range(3))`
			`result = ser.prod()`

			`assert not isinstance(result, Series)`


			`@pytest.mark.parametrize("func", [np.any, np.all])`
			`@pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}])`
			`def test_validate_any_all_out_keepdims_raises(kwargs, func):`
			`ser = Series([1, 2])`
			`param = next(iter(kwargs))`
			`name = func.__name__`

			`msg = (`
			`f"the '{param}' parameter is not "`
			`"supported in the pandas "`
			`rf"implementation of {name}\(\)"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`func(ser, **kwargs)`


			`def test_validate_sum_initial():`
			`ser = Series([1, 2])`
			`msg = (`
			`r"the 'initial' parameter is not "`
			`r"supported in the pandas "`
			`r"implementation of sum\(\)"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`np.sum(ser, initial=10)`


			`def test_validate_median_initial():`
			`ser = Series([1, 2])`
			`msg = (`
			`r"the 'overwrite_input' parameter is not "`
			`r"supported in the pandas "`
			`r"implementation of median\(\)"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`# It seems like np.median doesn't dispatch, so we use the`
			`# method instead of the ufunc.`
			`ser.median(overwrite_input=True)`


			`def test_validate_stat_keepdims():`
			`ser = Series([1, 2])`
			`msg = (`
			`r"the 'keepdims' parameter is not "`
			`r"supported in the pandas "`
			`r"implementation of sum\(\)"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`np.sum(ser, keepdims=True)`


			`def test_mean_with_convertible_string_raises(using_array_manager, using_infer_string):`
			`# GH#44008`
			`ser = Series(["1", "2"])`
			`if using_infer_string:`
			`msg = "does not support"`
			`with pytest.raises(TypeError, match=msg):`
			`ser.sum()`
			`else:`
			`assert ser.sum() == "12"`
			`msg = "Could not convert string '12' to numeric\|does not support"`
			`with pytest.raises(TypeError, match=msg):`
			`ser.mean()`

			`df = ser.to_frame()`
			`if not using_array_manager:`
			`msg = r"Could not convert \['12'\] to numeric\|does not support"`
			`with pytest.raises(TypeError, match=msg):`
			`df.mean()`


			`def test_mean_dont_convert_j_to_complex(using_array_manager):`
			`# GH#36703`
			`df = pd.DataFrame([{"db": "J", "numeric": 123}])`
			`if using_array_manager:`
			`msg = "Could not convert string 'J' to numeric"`
			`else:`
			`msg = r"Could not convert \['J'\] to numeric\|does not support"`
			`with pytest.raises(TypeError, match=msg):`
			`df.mean()`

			`with pytest.raises(TypeError, match=msg):`
			`df.agg("mean")`

			`msg = "Could not convert string 'J' to numeric\|does not support"`
			`with pytest.raises(TypeError, match=msg):`
			`df["db"].mean()`
			`msg = "Could not convert string 'J' to numeric\|ufunc 'divide'"`
			`with pytest.raises(TypeError, match=msg):`
			`np.mean(df["db"].astype("string").array)`


			`def test_median_with_convertible_string_raises(using_array_manager):`
			`# GH#34671 this _could_ return a string "2", but definitely not float 2.0`
			`msg = r"Cannot convert \['1' '2' '3'\] to numeric\|does not support"`
			`ser = Series(["1", "2", "3"])`
			`with pytest.raises(TypeError, match=msg):`
			`ser.median()`

			`if not using_array_manager:`
			`msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric\|does not support"`
			`df = ser.to_frame()`
			`with pytest.raises(TypeError, match=msg):`
			`df.median()`