You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

303 lines
10 KiB

8 months ago
import numpy as np
import pytest
import pandas as pd
from pandas import SparseDtype
import pandas._testing as tm
from pandas.core.arrays.sparse import SparseArray
@pytest.fixture
def arr_data():
return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
@pytest.fixture
def arr(arr_data):
return SparseArray(arr_data)
class TestGetitem:
def test_getitem(self, arr):
dense = arr.to_dense()
for i, value in enumerate(arr):
tm.assert_almost_equal(value, dense[i])
tm.assert_almost_equal(arr[-i], dense[-i])
def test_getitem_arraylike_mask(self, arr):
arr = SparseArray([0, 1, 2])
result = arr[[True, False, True]]
expected = SparseArray([0, 2])
tm.assert_sp_array_equal(result, expected)
@pytest.mark.parametrize(
"slc",
[
np.s_[:],
np.s_[1:10],
np.s_[1:100],
np.s_[10:1],
np.s_[:-3],
np.s_[-5:-4],
np.s_[:-12],
np.s_[-12:],
np.s_[2:],
np.s_[2::3],
np.s_[::2],
np.s_[::-1],
np.s_[::-2],
np.s_[1:6:2],
np.s_[:-6:-2],
],
)
@pytest.mark.parametrize(
"as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
)
def test_getslice(self, slc, as_dense):
as_dense = np.array(as_dense)
arr = SparseArray(as_dense)
result = arr[slc]
expected = SparseArray(as_dense[slc])
tm.assert_sp_array_equal(result, expected)
def test_getslice_tuple(self):
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
sparse = SparseArray(dense)
res = sparse[(slice(4, None),)]
exp = SparseArray(dense[4:])
tm.assert_sp_array_equal(res, exp)
sparse = SparseArray(dense, fill_value=0)
res = sparse[(slice(4, None),)]
exp = SparseArray(dense[4:], fill_value=0)
tm.assert_sp_array_equal(res, exp)
msg = "too many indices for array"
with pytest.raises(IndexError, match=msg):
sparse[4:, :]
with pytest.raises(IndexError, match=msg):
# check numpy compat
dense[4:, :]
def test_boolean_slice_empty(self):
arr = SparseArray([0, 1, 2])
res = arr[[False, False, False]]
assert res.dtype == arr.dtype
def test_getitem_bool_sparse_array(self, arr):
# GH 23122
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
exp = SparseArray([np.nan, 2, np.nan, 5, 6])
tm.assert_sp_array_equal(arr[spar_bool], exp)
spar_bool = ~spar_bool
res = arr[spar_bool]
exp = SparseArray([np.nan, 1, 3, 4, np.nan])
tm.assert_sp_array_equal(res, exp)
spar_bool = SparseArray(
[False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan
)
res = arr[spar_bool]
exp = SparseArray([np.nan, 3, 5])
tm.assert_sp_array_equal(res, exp)
def test_getitem_bool_sparse_array_as_comparison(self):
# GH 45110
arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
res = arr[arr > 2]
exp = SparseArray([3.0, 4.0], fill_value=np.nan)
tm.assert_sp_array_equal(res, exp)
def test_get_item(self, arr):
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
assert np.isnan(arr[1])
assert arr[2] == 1
assert arr[7] == 5
assert zarr[0] == 0
assert zarr[2] == 1
assert zarr[7] == 5
errmsg = "must be an integer between -10 and 10"
with pytest.raises(IndexError, match=errmsg):
arr[11]
with pytest.raises(IndexError, match=errmsg):
arr[-11]
assert arr[-1] == arr[len(arr) - 1]
class TestSetitem:
def test_set_item(self, arr_data):
arr = SparseArray(arr_data).copy()
def setitem():
arr[5] = 3
def setslice():
arr[1:5] = 2
with pytest.raises(TypeError, match="assignment via setitem"):
setitem()
with pytest.raises(TypeError, match="assignment via setitem"):
setslice()
class TestTake:
def test_take_scalar_raises(self, arr):
msg = "'indices' must be an array, not a scalar '2'."
with pytest.raises(ValueError, match=msg):
arr.take(2)
def test_take(self, arr_data, arr):
exp = SparseArray(np.take(arr_data, [2, 3]))
tm.assert_sp_array_equal(arr.take([2, 3]), exp)
exp = SparseArray(np.take(arr_data, [0, 1, 2]))
tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp)
def test_take_all_empty(self):
sparse = pd.array([0, 0], dtype=SparseDtype("int64"))
result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan)
tm.assert_sp_array_equal(sparse, result)
def test_take_different_fill_value(self):
# Take with a different fill value shouldn't overwrite the original
sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0))
result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan)
expected = pd.array([0, np.nan], dtype=sparse.dtype)
tm.assert_sp_array_equal(expected, result)
def test_take_fill_value(self):
data = np.array([1, np.nan, 0, 3, 0])
sparse = SparseArray(data, fill_value=0)
exp = SparseArray(np.take(data, [0]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([0]), exp)
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
def test_take_negative(self, arr_data, arr):
exp = SparseArray(np.take(arr_data, [-1]))
tm.assert_sp_array_equal(arr.take([-1]), exp)
exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
def test_bad_take(self, arr):
with pytest.raises(IndexError, match="bounds"):
arr.take([11])
def test_take_filling(self):
# similar tests as GH 12631
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
# TODO: actionable?
# XXX: test change: fill_value=True -> allow_fill=True
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
expected = SparseArray([np.nan, np.nan, np.nan])
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
msg = "Invalid value in 'indices'"
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -2]), allow_fill=True)
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -5]), allow_fill=True)
msg = "out of bounds value in 'indices'"
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]), allow_fill=True)
def test_take_filling_fill_value(self):
# same tests as GH#12631
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# fill_value
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
# TODO: actionable?
# XXX: behavior change.
# the old way of filling self.fill_value doesn't follow EA rules.
# It's supposed to be self.dtype.na_value (nan in this case)
expected = SparseArray([0, np.nan, np.nan], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
msg = "Invalid value in 'indices'."
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -2]), allow_fill=True)
with pytest.raises(ValueError, match=msg):
sparse.take(np.array([1, 0, -5]), allow_fill=True)
msg = "out of bounds value in 'indices'"
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]), fill_value=True)
@pytest.mark.parametrize("kind", ["block", "integer"])
def test_take_filling_all_nan(self, kind):
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
tm.assert_sp_array_equal(result, expected)
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
tm.assert_sp_array_equal(result, expected)
msg = "out of bounds value in 'indices'"
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError, match=msg):
sparse.take(np.array([1, 5]), fill_value=True)
class TestWhere:
def test_where_retain_fill_value(self):
# GH#45691 don't lose fill_value on _where
arr = SparseArray([np.nan, 1.0], fill_value=0)
mask = np.array([True, False])
res = arr._where(~mask, 1)
exp = SparseArray([1, 1.0], fill_value=0)
tm.assert_sp_array_equal(res, exp)
ser = pd.Series(arr)
res = ser.where(~mask, 1)
tm.assert_series_equal(res, pd.Series(exp))