You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
5.0 KiB
170 lines
5.0 KiB
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
DataFrame,
|
|
Index,
|
|
MultiIndex,
|
|
Series,
|
|
date_range,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
|
|
def test_unstack_preserves_object():
|
|
mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
|
|
|
|
ser = Series(np.arange(4.0), index=mi, dtype=object)
|
|
|
|
res1 = ser.unstack()
|
|
assert (res1.dtypes == object).all()
|
|
|
|
res2 = ser.unstack(level=0)
|
|
assert (res2.dtypes == object).all()
|
|
|
|
|
|
def test_unstack():
|
|
index = MultiIndex(
|
|
levels=[["bar", "foo"], ["one", "three", "two"]],
|
|
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
|
|
)
|
|
|
|
s = Series(np.arange(4.0), index=index)
|
|
unstacked = s.unstack()
|
|
|
|
expected = DataFrame(
|
|
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
|
|
index=["bar", "foo"],
|
|
columns=["one", "three", "two"],
|
|
)
|
|
|
|
tm.assert_frame_equal(unstacked, expected)
|
|
|
|
unstacked = s.unstack(level=0)
|
|
tm.assert_frame_equal(unstacked, expected.T)
|
|
|
|
index = MultiIndex(
|
|
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
|
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
|
)
|
|
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
|
exp_index = MultiIndex(
|
|
levels=[["one", "two", "three"], [0, 1]],
|
|
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
|
)
|
|
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
|
|
unstacked = s.unstack(0).sort_index()
|
|
tm.assert_frame_equal(unstacked, expected)
|
|
|
|
# GH5873
|
|
idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
|
|
ts = Series([1, 2], index=idx)
|
|
left = ts.unstack()
|
|
right = DataFrame(
|
|
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
|
|
)
|
|
tm.assert_frame_equal(left, right)
|
|
|
|
idx = MultiIndex.from_arrays(
|
|
[
|
|
["cat", "cat", "cat", "dog", "dog"],
|
|
["a", "a", "b", "a", "b"],
|
|
[1, 2, 1, 1, np.nan],
|
|
]
|
|
)
|
|
ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
|
|
right = DataFrame(
|
|
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
|
|
columns=["cat", "dog"],
|
|
)
|
|
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
|
|
right.index = MultiIndex.from_tuples(tpls)
|
|
tm.assert_frame_equal(ts.unstack(level=0), right)
|
|
|
|
|
|
def test_unstack_tuplename_in_multiindex():
|
|
# GH 19966
|
|
idx = MultiIndex.from_product(
|
|
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
|
|
)
|
|
ser = Series(1, index=idx)
|
|
result = ser.unstack(("A", "a"))
|
|
|
|
expected = DataFrame(
|
|
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
|
|
columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
|
|
index=Index([1, 2, 3], name=("B", "b")),
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"unstack_idx, expected_values, expected_index, expected_columns",
|
|
[
|
|
(
|
|
("A", "a"),
|
|
[[1, 1], [1, 1], [1, 1], [1, 1]],
|
|
MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
|
|
MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
|
|
),
|
|
(
|
|
(("A", "a"), "B"),
|
|
[[1, 1, 1, 1], [1, 1, 1, 1]],
|
|
Index([3, 4], name="C"),
|
|
MultiIndex.from_tuples(
|
|
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
|
|
),
|
|
),
|
|
],
|
|
)
|
|
def test_unstack_mixed_type_name_in_multiindex(
|
|
unstack_idx, expected_values, expected_index, expected_columns
|
|
):
|
|
# GH 19966
|
|
idx = MultiIndex.from_product(
|
|
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
|
|
)
|
|
ser = Series(1, index=idx)
|
|
result = ser.unstack(unstack_idx)
|
|
|
|
expected = DataFrame(
|
|
expected_values, columns=expected_columns, index=expected_index
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_unstack_multi_index_categorical_values():
|
|
df = DataFrame(
|
|
np.random.default_rng(2).standard_normal((10, 4)),
|
|
columns=Index(list("ABCD"), dtype=object),
|
|
index=date_range("2000-01-01", periods=10, freq="B"),
|
|
)
|
|
mi = df.stack(future_stack=True).index.rename(["major", "minor"])
|
|
ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
|
|
|
|
result = ser.unstack()
|
|
|
|
dti = ser.index.levels[0]
|
|
c = pd.Categorical(["foo"] * len(dti))
|
|
expected = DataFrame(
|
|
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
|
|
columns=Index(list("ABCD"), name="minor"),
|
|
index=dti.rename("major"),
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_unstack_mixed_level_names():
|
|
# GH#48763
|
|
arrays = [["a", "a"], [1, 2], ["red", "blue"]]
|
|
idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y"))
|
|
ser = Series([1, 2], index=idx)
|
|
result = ser.unstack("x")
|
|
expected = DataFrame(
|
|
[[1], [2]],
|
|
columns=Index(["a"], name="x"),
|
|
index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|