You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
126 lines
4.1 KiB
126 lines
4.1 KiB
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
DataFrame,
|
|
Series,
|
|
array,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"op, expected",
|
|
[
|
|
["sum", np.int64(3)],
|
|
["prod", np.int64(2)],
|
|
["min", np.int64(1)],
|
|
["max", np.int64(2)],
|
|
["mean", np.float64(1.5)],
|
|
["median", np.float64(1.5)],
|
|
["var", np.float64(0.5)],
|
|
["std", np.float64(0.5**0.5)],
|
|
["skew", pd.NA],
|
|
["kurt", pd.NA],
|
|
["any", True],
|
|
["all", True],
|
|
],
|
|
)
|
|
def test_series_reductions(op, expected):
|
|
ser = Series([1, 2], dtype="Int64")
|
|
result = getattr(ser, op)()
|
|
tm.assert_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"op, expected",
|
|
[
|
|
["sum", Series([3], index=["a"], dtype="Int64")],
|
|
["prod", Series([2], index=["a"], dtype="Int64")],
|
|
["min", Series([1], index=["a"], dtype="Int64")],
|
|
["max", Series([2], index=["a"], dtype="Int64")],
|
|
["mean", Series([1.5], index=["a"], dtype="Float64")],
|
|
["median", Series([1.5], index=["a"], dtype="Float64")],
|
|
["var", Series([0.5], index=["a"], dtype="Float64")],
|
|
["std", Series([0.5**0.5], index=["a"], dtype="Float64")],
|
|
["skew", Series([pd.NA], index=["a"], dtype="Float64")],
|
|
["kurt", Series([pd.NA], index=["a"], dtype="Float64")],
|
|
["any", Series([True], index=["a"], dtype="boolean")],
|
|
["all", Series([True], index=["a"], dtype="boolean")],
|
|
],
|
|
)
|
|
def test_dataframe_reductions(op, expected):
|
|
df = DataFrame({"a": array([1, 2], dtype="Int64")})
|
|
result = getattr(df, op)()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"op, expected",
|
|
[
|
|
["sum", array([1, 3], dtype="Int64")],
|
|
["prod", array([1, 3], dtype="Int64")],
|
|
["min", array([1, 3], dtype="Int64")],
|
|
["max", array([1, 3], dtype="Int64")],
|
|
["mean", array([1, 3], dtype="Float64")],
|
|
["median", array([1, 3], dtype="Float64")],
|
|
["var", array([pd.NA], dtype="Float64")],
|
|
["std", array([pd.NA], dtype="Float64")],
|
|
["skew", array([pd.NA], dtype="Float64")],
|
|
["any", array([True, True], dtype="boolean")],
|
|
["all", array([True, True], dtype="boolean")],
|
|
],
|
|
)
|
|
def test_groupby_reductions(op, expected):
|
|
df = DataFrame(
|
|
{
|
|
"A": ["a", "b", "b"],
|
|
"B": array([1, None, 3], dtype="Int64"),
|
|
}
|
|
)
|
|
result = getattr(df.groupby("A"), op)()
|
|
expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"])
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"op, expected",
|
|
[
|
|
["sum", Series([4, 4], index=["B", "C"], dtype="Float64")],
|
|
["prod", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
|
["min", Series([1, 1], index=["B", "C"], dtype="Float64")],
|
|
["max", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
|
["mean", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
|
["median", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
|
["var", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
|
["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")],
|
|
["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
|
["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
|
["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
|
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
|
],
|
|
)
|
|
def test_mixed_reductions(op, expected, using_infer_string):
|
|
if op in ["any", "all"] and using_infer_string:
|
|
expected = expected.astype("bool")
|
|
df = DataFrame(
|
|
{
|
|
"A": ["a", "b", "b"],
|
|
"B": [1, None, 3],
|
|
"C": array([1, None, 3], dtype="Int64"),
|
|
}
|
|
)
|
|
|
|
# series
|
|
result = getattr(df.C, op)()
|
|
tm.assert_equal(result, expected["C"])
|
|
|
|
# frame
|
|
if op in ["any", "all"]:
|
|
result = getattr(df, op)()
|
|
else:
|
|
result = getattr(df, op)(numeric_only=True)
|
|
tm.assert_series_equal(result, expected)
|