You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
4.8 KiB
182 lines
4.8 KiB
from io import StringIO
|
|
from string import ascii_uppercase
|
|
import textwrap
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas.compat import PYPY
|
|
|
|
from pandas import (
|
|
CategoricalIndex,
|
|
MultiIndex,
|
|
Series,
|
|
date_range,
|
|
)
|
|
|
|
|
|
def test_info_categorical_column_just_works():
|
|
n = 2500
|
|
data = np.array(list("abcdefghij")).take(
|
|
np.random.default_rng(2).integers(0, 10, size=n, dtype=int)
|
|
)
|
|
s = Series(data).astype("category")
|
|
s.isna()
|
|
buf = StringIO()
|
|
s.info(buf=buf)
|
|
|
|
s2 = s[s == "d"]
|
|
buf = StringIO()
|
|
s2.info(buf=buf)
|
|
|
|
|
|
def test_info_categorical():
|
|
# GH14298
|
|
idx = CategoricalIndex(["a", "b"])
|
|
s = Series(np.zeros(2), index=idx)
|
|
buf = StringIO()
|
|
s.info(buf=buf)
|
|
|
|
|
|
@pytest.mark.parametrize("verbose", [True, False])
|
|
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
|
|
index = lexsorted_two_level_string_multiindex
|
|
ser = Series(range(len(index)), index=index, name="sth")
|
|
buf = StringIO()
|
|
ser.info(verbose=verbose, buf=buf)
|
|
result = buf.getvalue()
|
|
|
|
expected = textwrap.dedent(
|
|
"""\
|
|
<class 'pandas.core.series.Series'>
|
|
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
|
|
"""
|
|
)
|
|
if verbose:
|
|
expected += textwrap.dedent(
|
|
"""\
|
|
Series name: sth
|
|
Non-Null Count Dtype
|
|
-------------- -----
|
|
10 non-null int64
|
|
"""
|
|
)
|
|
expected += textwrap.dedent(
|
|
f"""\
|
|
dtypes: int64(1)
|
|
memory usage: {ser.memory_usage()}.0+ bytes
|
|
"""
|
|
)
|
|
assert result == expected
|
|
|
|
|
|
def test_info_memory():
|
|
s = Series([1, 2], dtype="i8")
|
|
buf = StringIO()
|
|
s.info(buf=buf)
|
|
result = buf.getvalue()
|
|
memory_bytes = float(s.memory_usage())
|
|
expected = textwrap.dedent(
|
|
f"""\
|
|
<class 'pandas.core.series.Series'>
|
|
RangeIndex: 2 entries, 0 to 1
|
|
Series name: None
|
|
Non-Null Count Dtype
|
|
-------------- -----
|
|
2 non-null int64
|
|
dtypes: int64(1)
|
|
memory usage: {memory_bytes} bytes
|
|
"""
|
|
)
|
|
assert result == expected
|
|
|
|
|
|
def test_info_wide():
|
|
s = Series(np.random.default_rng(2).standard_normal(101))
|
|
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.info(max_cols=1)
|
|
|
|
|
|
def test_info_shows_dtypes():
|
|
dtypes = [
|
|
"int64",
|
|
"float64",
|
|
"datetime64[ns]",
|
|
"timedelta64[ns]",
|
|
"complex128",
|
|
"object",
|
|
"bool",
|
|
]
|
|
n = 10
|
|
for dtype in dtypes:
|
|
s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype))
|
|
buf = StringIO()
|
|
s.info(buf=buf)
|
|
res = buf.getvalue()
|
|
name = f"{n:d} non-null {dtype}"
|
|
assert name in res
|
|
|
|
|
|
@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
|
|
def test_info_memory_usage_deep_not_pypy():
|
|
s_with_object_index = Series({"a": [1]}, index=["foo"])
|
|
assert s_with_object_index.memory_usage(
|
|
index=True, deep=True
|
|
) > s_with_object_index.memory_usage(index=True)
|
|
|
|
s_object = Series({"a": ["a"]})
|
|
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
|
|
|
|
|
|
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
|
|
def test_info_memory_usage_deep_pypy():
|
|
s_with_object_index = Series({"a": [1]}, index=["foo"])
|
|
assert s_with_object_index.memory_usage(
|
|
index=True, deep=True
|
|
) == s_with_object_index.memory_usage(index=True)
|
|
|
|
s_object = Series({"a": ["a"]})
|
|
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"series, plus",
|
|
[
|
|
(Series(1, index=[1, 2, 3]), False),
|
|
(Series(1, index=list("ABC")), True),
|
|
(Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
|
|
(
|
|
Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
|
|
True,
|
|
),
|
|
],
|
|
)
|
|
def test_info_memory_usage_qualified(series, plus):
|
|
buf = StringIO()
|
|
series.info(buf=buf)
|
|
if plus:
|
|
assert "+" in buf.getvalue()
|
|
else:
|
|
assert "+" not in buf.getvalue()
|
|
|
|
|
|
def test_info_memory_usage_bug_on_multiindex():
|
|
# GH 14308
|
|
# memory usage introspection should not materialize .values
|
|
N = 100
|
|
M = len(ascii_uppercase)
|
|
index = MultiIndex.from_product(
|
|
[list(ascii_uppercase), date_range("20160101", periods=N)],
|
|
names=["id", "date"],
|
|
)
|
|
s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
|
|
|
|
unstacked = s.unstack("id")
|
|
assert s.values.nbytes == unstacked.values.nbytes
|
|
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
|
|
|
|
# high upper bound
|
|
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
|
|
assert diff < 2000
|