You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
385 lines
12 KiB
385 lines
12 KiB
8 months ago
|
import operator
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core import ops
|
||
|
from pandas.core.arrays import FloatingArray
|
||
|
|
||
|
# Basic test for the arithmetic array ops
|
||
|
# -----------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"opname, exp",
|
||
|
[("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
|
||
|
ids=["add", "mul"],
|
||
|
)
|
||
|
def test_add_mul(dtype, opname, exp):
|
||
|
a = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||
|
b = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||
|
|
||
|
# array / array
|
||
|
expected = pd.array(exp, dtype=dtype)
|
||
|
|
||
|
op = getattr(operator, opname)
|
||
|
result = op(a, b)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
op = getattr(ops, "r" + opname)
|
||
|
result = op(a, b)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_sub(dtype):
|
||
|
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||
|
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||
|
|
||
|
result = a - b
|
||
|
expected = pd.array([1, 1, None, None, 1], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_div(dtype):
|
||
|
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||
|
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||
|
|
||
|
result = a / b
|
||
|
expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||
|
def test_divide_by_zero(zero, negative):
|
||
|
# https://github.com/pandas-dev/pandas/issues/27398, GH#22793
|
||
|
a = pd.array([0, 1, -1, None], dtype="Int64")
|
||
|
result = a / zero
|
||
|
expected = FloatingArray(
|
||
|
np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
|
||
|
np.array([False, False, False, True]),
|
||
|
)
|
||
|
if negative:
|
||
|
expected *= -1
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_floordiv(dtype):
|
||
|
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||
|
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||
|
|
||
|
result = a // b
|
||
|
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
|
||
|
expected = pd.array([0, 2, None, None, 1], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
|
||
|
# GH 48223: Aligns with non-masked floordiv
|
||
|
# but differs from numpy
|
||
|
# https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
|
||
|
ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
|
||
|
result = 1 // ser
|
||
|
expected = pd.Series([np.inf, 1.0], dtype="Float64")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
|
||
|
result = 1 // ser_non_nullable
|
||
|
expected = expected.astype(np.float64)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_mod(dtype):
|
||
|
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||
|
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||
|
|
||
|
result = a % b
|
||
|
expected = pd.array([0, 0, None, None, 1], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_pow_scalar():
|
||
|
a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||
|
result = a**0
|
||
|
expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = a**1
|
||
|
expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = a**pd.NA
|
||
|
expected = pd.array([None, None, 1, None, None], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = a**np.nan
|
||
|
expected = FloatingArray(
|
||
|
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
|
||
|
np.array([False, False, False, True, False]),
|
||
|
)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
# reversed
|
||
|
a = a[1:] # Can't raise integers to negative powers.
|
||
|
|
||
|
result = 0**a
|
||
|
expected = pd.array([1, 0, None, 0], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = 1**a
|
||
|
expected = pd.array([1, 1, 1, 1], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = pd.NA**a
|
||
|
expected = pd.array([1, None, None, None], dtype="Int64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = np.nan**a
|
||
|
expected = FloatingArray(
|
||
|
np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
|
||
|
np.array([False, False, True, False]),
|
||
|
)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_pow_array():
|
||
|
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
|
||
|
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
|
||
|
result = a**b
|
||
|
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_rpow_one_to_na():
|
||
|
# https://github.com/pandas-dev/pandas/issues/22022
|
||
|
# https://github.com/pandas-dev/pandas/issues/29997
|
||
|
arr = pd.array([np.nan, np.nan], dtype="Int64")
|
||
|
result = np.array([1.0, 2.0]) ** arr
|
||
|
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("other", [0, 0.5])
|
||
|
def test_numpy_zero_dim_ndarray(other):
|
||
|
arr = pd.array([1, None, 2])
|
||
|
result = arr + np.array(other)
|
||
|
expected = arr + other
|
||
|
tm.assert_equal(result, expected)
|
||
|
|
||
|
|
||
|
# Test generic characteristics / errors
|
||
|
# -----------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||
|
op = all_arithmetic_operators
|
||
|
s = pd.Series(data)
|
||
|
ops = getattr(s, op)
|
||
|
|
||
|
if using_infer_string:
|
||
|
import pyarrow as pa
|
||
|
|
||
|
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||
|
else:
|
||
|
errs = TypeError
|
||
|
|
||
|
# invalid scalars
|
||
|
msg = "|".join(
|
||
|
[
|
||
|
r"can only perform ops with numeric values",
|
||
|
r"IntegerArray cannot perform the operation mod",
|
||
|
r"unsupported operand type",
|
||
|
r"can only concatenate str \(not \"int\"\) to str",
|
||
|
"not all arguments converted during string",
|
||
|
"ufunc '.*' not supported for the input types, and the inputs could not",
|
||
|
"ufunc '.*' did not contain a loop with signature matching types",
|
||
|
"Addition/subtraction of integers and integer-arrays with Timestamp",
|
||
|
"has no kernel",
|
||
|
"not implemented",
|
||
|
]
|
||
|
)
|
||
|
with pytest.raises(errs, match=msg):
|
||
|
ops("foo")
|
||
|
with pytest.raises(errs, match=msg):
|
||
|
ops(pd.Timestamp("20180101"))
|
||
|
|
||
|
# invalid array-likes
|
||
|
str_ser = pd.Series("foo", index=s.index)
|
||
|
# with pytest.raises(TypeError, match=msg):
|
||
|
if (
|
||
|
all_arithmetic_operators
|
||
|
in [
|
||
|
"__mul__",
|
||
|
"__rmul__",
|
||
|
]
|
||
|
and not using_infer_string
|
||
|
): # (data[~data.isna()] >= 0).all():
|
||
|
res = ops(str_ser)
|
||
|
expected = pd.Series(["foo" * x for x in data], index=s.index)
|
||
|
expected = expected.fillna(np.nan)
|
||
|
# TODO: doing this fillna to keep tests passing as we make
|
||
|
# assert_almost_equal stricter, but the expected with pd.NA seems
|
||
|
# more-correct than np.nan here.
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
else:
|
||
|
with pytest.raises(errs, match=msg):
|
||
|
ops(str_ser)
|
||
|
|
||
|
msg = "|".join(
|
||
|
[
|
||
|
"can only perform ops with numeric values",
|
||
|
"cannot perform .* with this index type: DatetimeArray",
|
||
|
"Addition/subtraction of integers and integer-arrays "
|
||
|
"with DatetimeArray is no longer supported. *",
|
||
|
"unsupported operand type",
|
||
|
r"can only concatenate str \(not \"int\"\) to str",
|
||
|
"not all arguments converted during string",
|
||
|
"cannot subtract DatetimeArray from ndarray",
|
||
|
"has no kernel",
|
||
|
"not implemented",
|
||
|
]
|
||
|
)
|
||
|
with pytest.raises(errs, match=msg):
|
||
|
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||
|
|
||
|
|
||
|
# Various
|
||
|
# -----------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
# TODO test unsigned overflow
|
||
|
|
||
|
|
||
|
def test_arith_coerce_scalar(data, all_arithmetic_operators):
|
||
|
op = tm.get_op_from_name(all_arithmetic_operators)
|
||
|
s = pd.Series(data)
|
||
|
other = 0.01
|
||
|
|
||
|
result = op(s, other)
|
||
|
expected = op(s.astype(float), other)
|
||
|
expected = expected.astype("Float64")
|
||
|
|
||
|
# rmod results in NaN that wasn't NA in original nullable Series -> unmask it
|
||
|
if all_arithmetic_operators == "__rmod__":
|
||
|
mask = (s == 0).fillna(False).to_numpy(bool)
|
||
|
expected.array._mask[mask] = False
|
||
|
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("other", [1.0, np.array(1.0)])
|
||
|
def test_arithmetic_conversion(all_arithmetic_operators, other):
|
||
|
# if we have a float operand we should have a float result
|
||
|
# if that is equal to an integer
|
||
|
op = tm.get_op_from_name(all_arithmetic_operators)
|
||
|
|
||
|
s = pd.Series([1, 2, 3], dtype="Int64")
|
||
|
result = op(s, other)
|
||
|
assert result.dtype == "Float64"
|
||
|
|
||
|
|
||
|
def test_cross_type_arithmetic():
|
||
|
df = pd.DataFrame(
|
||
|
{
|
||
|
"A": pd.Series([1, 2, np.nan], dtype="Int64"),
|
||
|
"B": pd.Series([1, np.nan, 3], dtype="UInt8"),
|
||
|
"C": [1, 2, 3],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = df.A + df.C
|
||
|
expected = pd.Series([2, 4, np.nan], dtype="Int64")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = (df.A + df.C) * 3 == 12
|
||
|
expected = pd.Series([False, True, None], dtype="boolean")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = df.A + df.B
|
||
|
expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("op", ["mean"])
|
||
|
def test_reduce_to_float(op):
|
||
|
# some reduce ops always return float, even if the result
|
||
|
# is a rounded number
|
||
|
df = pd.DataFrame(
|
||
|
{
|
||
|
"A": ["a", "b", "b"],
|
||
|
"B": [1, None, 3],
|
||
|
"C": pd.array([1, None, 3], dtype="Int64"),
|
||
|
}
|
||
|
)
|
||
|
|
||
|
# op
|
||
|
result = getattr(df.C, op)()
|
||
|
assert isinstance(result, float)
|
||
|
|
||
|
# groupby
|
||
|
result = getattr(df.groupby("A"), op)()
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
|
||
|
index=pd.Index(["a", "b"], name="A"),
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"source, neg_target, abs_target",
|
||
|
[
|
||
|
([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
|
||
|
([1, 2, None], [-1, -2, None], [1, 2, None]),
|
||
|
([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
|
||
|
],
|
||
|
)
|
||
|
def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
|
||
|
dtype = any_signed_int_ea_dtype
|
||
|
arr = pd.array(source, dtype=dtype)
|
||
|
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||
|
neg_target = pd.array(neg_target, dtype=dtype)
|
||
|
abs_target = pd.array(abs_target, dtype=dtype)
|
||
|
|
||
|
tm.assert_extension_array_equal(neg_result, neg_target)
|
||
|
tm.assert_extension_array_equal(pos_result, arr)
|
||
|
assert not tm.shares_memory(pos_result, arr)
|
||
|
tm.assert_extension_array_equal(abs_result, abs_target)
|
||
|
|
||
|
|
||
|
def test_values_multiplying_large_series_by_NA():
|
||
|
# GH#33701
|
||
|
|
||
|
result = pd.NA * pd.Series(np.zeros(10001))
|
||
|
expected = pd.Series([pd.NA] * 10001)
|
||
|
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_bitwise(dtype):
|
||
|
left = pd.array([1, None, 3, 4], dtype=dtype)
|
||
|
right = pd.array([None, 3, 5, 4], dtype=dtype)
|
||
|
|
||
|
result = left | right
|
||
|
expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = left & right
|
||
|
expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
result = left ^ right
|
||
|
expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
|
||
|
tm.assert_extension_array_equal(result, expected)
|
||
|
|
||
|
# TODO: desired behavior when operating with boolean? defer?
|
||
|
|
||
|
floats = right.astype("Float64")
|
||
|
with pytest.raises(TypeError, match="unsupported operand type"):
|
||
|
left | floats
|
||
|
with pytest.raises(TypeError, match="unsupported operand type"):
|
||
|
left & floats
|
||
|
with pytest.raises(TypeError, match="unsupported operand type"):
|
||
|
left ^ floats
|