You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2593 lines
96 KiB

""" Test cases for DataFrame.plot """
from datetime import (
date,
datetime,
)
import gc
import itertools
import re
import string
import weakref
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas.core.dtypes.api import is_list_like
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
PeriodIndex,
Series,
bdate_range,
date_range,
option_context,
plotting,
)
import pandas._testing as tm
from pandas.tests.plotting.common import (
_check_ax_scales,
_check_axes_shape,
_check_box_return_type,
_check_colors,
_check_data,
_check_grid_settings,
_check_has_errorbars,
_check_legend_labels,
_check_plot_works,
_check_text_labels,
_check_ticks_props,
_check_visible,
get_y_axis,
)
from pandas.io.formats.printing import pprint_thing
mpl = pytest.importorskip("matplotlib")
plt = pytest.importorskip("matplotlib.pyplot")
class TestDataFramePlots:
@pytest.mark.slow
def test_plot(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
_check_plot_works(df.plot, grid=False)
@pytest.mark.slow
def test_plot_subplots(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
# _check_plot_works adds an ax so use default_axes=True to avoid warning
axes = _check_plot_works(df.plot, default_axes=True, subplots=True)
_check_axes_shape(axes, axes_num=4, layout=(4, 1))
@pytest.mark.slow
def test_plot_subplots_negative_layout(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
axes = _check_plot_works(
df.plot,
default_axes=True,
subplots=True,
layout=(-1, 2),
)
_check_axes_shape(axes, axes_num=4, layout=(2, 2))
@pytest.mark.slow
def test_plot_subplots_use_index(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
axes = _check_plot_works(
df.plot,
default_axes=True,
subplots=True,
use_index=False,
)
_check_ticks_props(axes, xrot=0)
_check_axes_shape(axes, axes_num=4, layout=(4, 1))
@pytest.mark.xfail(reason="Api changed in 3.6.0")
@pytest.mark.slow
def test_plot_invalid_arg(self):
df = DataFrame({"x": [1, 2], "y": [3, 4]})
msg = "'Line2D' object has no property 'blarg'"
with pytest.raises(AttributeError, match=msg):
df.plot.line(blarg=True)
@pytest.mark.slow
def test_plot_tick_props(self):
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=list(string.ascii_letters[:10]),
)
ax = _check_plot_works(df.plot, use_index=True)
_check_ticks_props(ax, xrot=0)
@pytest.mark.slow
@pytest.mark.parametrize(
"kwargs",
[
{"yticks": [1, 5, 10]},
{"xticks": [1, 5, 10]},
{"ylim": (-100, 100), "xlim": (-100, 100)},
{"default_axes": True, "subplots": True, "title": "blah"},
],
)
def test_plot_other_args(self, kwargs):
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=list(string.ascii_letters[:10]),
)
_check_plot_works(df.plot, **kwargs)
@pytest.mark.slow
def test_plot_visible_ax(self):
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=list(string.ascii_letters[:10]),
)
# We have to redo it here because _check_plot_works does two plots,
# once without an ax kwarg and once with an ax kwarg and the new sharex
# behaviour does not remove the visibility of the latter axis (as ax is
# present). see: https://github.com/pandas-dev/pandas/issues/9737
axes = df.plot(subplots=True, title="blah")
_check_axes_shape(axes, axes_num=3, layout=(3, 1))
for ax in axes[:2]:
_check_visible(ax.xaxis) # xaxis must be visible for grid
_check_visible(ax.get_xticklabels(), visible=False)
_check_visible(ax.get_xticklabels(minor=True), visible=False)
_check_visible([ax.xaxis.get_label()], visible=False)
for ax in [axes[2]]:
_check_visible(ax.xaxis)
_check_visible(ax.get_xticklabels())
_check_visible([ax.xaxis.get_label()])
_check_ticks_props(ax, xrot=0)
@pytest.mark.slow
def test_plot_title(self):
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=list(string.ascii_letters[:10]),
)
_check_plot_works(df.plot, title="blah")
@pytest.mark.slow
def test_plot_multiindex(self):
tuples = zip(string.ascii_letters[:10], range(10))
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=MultiIndex.from_tuples(tuples),
)
ax = _check_plot_works(df.plot, use_index=True)
_check_ticks_props(ax, xrot=0)
@pytest.mark.slow
def test_plot_multiindex_unicode(self):
# unicode
index = MultiIndex.from_tuples(
[
("\u03b1", 0),
("\u03b1", 1),
("\u03b2", 2),
("\u03b2", 3),
("\u03b3", 4),
("\u03b3", 5),
("\u03b4", 6),
("\u03b4", 7),
],
names=["i0", "i1"],
)
columns = MultiIndex.from_tuples(
[("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"]
)
df = DataFrame(
np.random.default_rng(2).integers(0, 10, (8, 2)),
columns=columns,
index=index,
)
_check_plot_works(df.plot, title="\u03A3")
@pytest.mark.slow
@pytest.mark.parametrize("layout", [None, (-1, 1)])
def test_plot_single_column_bar(self, layout):
# GH 6951
# Test with single column
df = DataFrame({"x": np.random.default_rng(2).random(10)})
axes = _check_plot_works(df.plot.bar, subplots=True, layout=layout)
_check_axes_shape(axes, axes_num=1, layout=(1, 1))
@pytest.mark.slow
def test_plot_passed_ax(self):
# When ax is supplied and required number of axes is 1,
# passed ax should be used:
df = DataFrame({"x": np.random.default_rng(2).random(10)})
_, ax = mpl.pyplot.subplots()
axes = df.plot.bar(subplots=True, ax=ax)
assert len(axes) == 1
result = ax.axes
assert result is axes[0]
@pytest.mark.parametrize(
"cols, x, y",
[
[list("ABCDE"), "A", "B"],
[["A", "B"], "A", "B"],
[["C", "A"], "C", "A"],
[["A", "C"], "A", "C"],
[["B", "C"], "B", "C"],
[["A", "D"], "A", "D"],
[["A", "E"], "A", "E"],
],
)
def test_nullable_int_plot(self, cols, x, y):
# GH 32073
dates = ["2008", "2009", None, "2011", "2012"]
df = DataFrame(
{
"A": [1, 2, 3, 4, 5],
"B": [1, 2, 3, 4, 5],
"C": np.array([7, 5, np.nan, 3, 2], dtype=object),
"D": pd.to_datetime(dates, format="%Y").view("i8"),
"E": pd.to_datetime(dates, format="%Y", utc=True).view("i8"),
}
)
_check_plot_works(df[cols].plot, x=x, y=y)
@pytest.mark.slow
@pytest.mark.parametrize("plot", ["line", "bar", "hist", "pie"])
def test_integer_array_plot_series(self, plot):
# GH 25587
arr = pd.array([1, 2, 3, 4], dtype="UInt32")
s = Series(arr)
_check_plot_works(getattr(s.plot, plot))
@pytest.mark.slow
@pytest.mark.parametrize(
"plot, kwargs",
[
["line", {}],
["bar", {}],
["hist", {}],
["pie", {"y": "y"}],
["scatter", {"x": "x", "y": "y"}],
["hexbin", {"x": "x", "y": "y"}],
],
)
def test_integer_array_plot_df(self, plot, kwargs):
# GH 25587
arr = pd.array([1, 2, 3, 4], dtype="UInt32")
df = DataFrame({"x": arr, "y": arr})
_check_plot_works(getattr(df.plot, plot), **kwargs)
def test_nonnumeric_exclude(self):
df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]})
ax = df.plot()
assert len(ax.get_lines()) == 1 # B was plotted
def test_implicit_label(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"]
)
ax = df.plot(x="a", y="b")
_check_text_labels(ax.xaxis.get_label(), "a")
def test_donot_overwrite_index_name(self):
# GH 8494
df = DataFrame(
np.random.default_rng(2).standard_normal((2, 2)), columns=["a", "b"]
)
df.index.name = "NAME"
df.plot(y="b", label="LABEL")
assert df.index.name == "NAME"
def test_plot_xy(self):
# columns.inferred_type == 'string'
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=5, freq="B"),
)
_check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot())
_check_data(df.plot(x=0), df.set_index("A").plot())
_check_data(df.plot(y=0), df.B.plot())
_check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot())
_check_data(df.plot(x="A"), df.set_index("A").plot())
_check_data(df.plot(y="B"), df.B.plot())
def test_plot_xy_int_cols(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=5, freq="B"),
)
# columns.inferred_type == 'integer'
df.columns = np.arange(1, len(df.columns) + 1)
_check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot())
_check_data(df.plot(x=1), df.set_index(1).plot())
_check_data(df.plot(y=1), df[1].plot())
def test_plot_xy_figsize_and_title(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=5, freq="B"),
)
# figsize and title
ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8))
_check_text_labels(ax.title, "Test")
_check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0))
# columns.inferred_type == 'mixed'
# TODO add MultiIndex test
@pytest.mark.parametrize(
"input_log, expected_log", [(True, "log"), ("sym", "symlog")]
)
def test_logscales(self, input_log, expected_log):
df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
ax = df.plot(logy=input_log)
_check_ax_scales(ax, yaxis=expected_log)
assert ax.get_yscale() == expected_log
ax = df.plot(logx=input_log)
_check_ax_scales(ax, xaxis=expected_log)
assert ax.get_xscale() == expected_log
ax = df.plot(loglog=input_log)
_check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log)
assert ax.get_xscale() == expected_log
assert ax.get_yscale() == expected_log
@pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"])
def test_invalid_logscale(self, input_param):
# GH: 24867
df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
msg = f"keyword '{input_param}' should be bool, None, or 'sym', not 'sm'"
with pytest.raises(ValueError, match=msg):
df.plot(**{input_param: "sm"})
msg = f"PiePlot ignores the '{input_param}' keyword"
with tm.assert_produces_warning(UserWarning, match=msg):
df.plot.pie(subplots=True, **{input_param: True})
def test_xcompat(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
ax = df.plot(x_compat=True)
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
_check_ticks_props(ax, xrot=30)
def test_xcompat_plot_params(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
plotting.plot_params["xaxis.compat"] = True
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
_check_ticks_props(ax, xrot=30)
def test_xcompat_plot_params_x_compat(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
plotting.plot_params["x_compat"] = False
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
msg = r"PeriodDtype\[B\] is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
def test_xcompat_plot_params_context_manager(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
# useful if you're plotting a bunch together
with plotting.plot_params.use("x_compat", True):
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
_check_ticks_props(ax, xrot=30)
def test_xcompat_plot_period(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
msg = r"PeriodDtype\[B\] is deprecated "
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
_check_ticks_props(ax, xrot=0)
def test_period_compat(self):
# GH 9012
# period-array conversions
df = DataFrame(
np.random.default_rng(2).random((21, 2)),
index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)),
columns=["a", "b"],
)
df.plot()
mpl.pyplot.axhline(y=0)
@pytest.mark.parametrize("index_dtype", [np.int64, np.float64])
def test_unsorted_index(self, index_dtype):
df = DataFrame(
{"y": np.arange(100)},
index=Index(np.arange(99, -1, -1), dtype=index_dtype),
dtype=np.int64,
)
ax = df.plot()
lines = ax.get_lines()[0]
rs = lines.get_xydata()
rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y")
tm.assert_series_equal(rs, df.y, check_index_type=False)
@pytest.mark.parametrize(
"df",
[
DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0]),
DataFrame(
{"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]},
index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0],
),
],
)
def test_unsorted_index_lims(self, df):
ax = df.plot()
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= np.nanmin(lines[0].get_data()[0])
assert xmax >= np.nanmax(lines[0].get_data()[0])
def test_unsorted_index_lims_x_y(self):
df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]})
ax = df.plot(x="z", y="y")
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= np.nanmin(lines[0].get_data()[0])
assert xmax >= np.nanmax(lines[0].get_data()[0])
def test_negative_log(self):
df = -DataFrame(
np.random.default_rng(2).random((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
msg = "Log-y scales are not supported in area plot"
with pytest.raises(ValueError, match=msg):
df.plot.area(logy=True)
with pytest.raises(ValueError, match=msg):
df.plot.area(loglog=True)
def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
base = np.zeros(len(normal_lines[0].get_data()[1]))
for nl, sl in zip(normal_lines, stacked_lines):
base += nl.get_data()[1] # get y coordinates
sy = sl.get_data()[1]
tm.assert_numpy_array_equal(base, sy)
@pytest.mark.parametrize("kind", ["line", "area"])
@pytest.mark.parametrize("mult", [1, -1])
def test_line_area_stacked(self, kind, mult):
df = mult * DataFrame(
np.random.default_rng(2).random((6, 4)), columns=["w", "x", "y", "z"]
)
ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)
@pytest.mark.parametrize("kind", ["line", "area"])
def test_line_area_stacked_sep_df(self, kind):
# each column has either positive or negative value
sep_df = DataFrame(
{
"w": np.random.default_rng(2).random(6),
"x": np.random.default_rng(2).random(6),
"y": -np.random.default_rng(2).random(6),
"z": -np.random.default_rng(2).random(6),
}
)
ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
def test_line_area_stacked_mixed(self):
mixed_df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["w", "x", "y", "z"],
)
_check_plot_works(mixed_df.plot, stacked=False)
msg = (
"When stacked is True, each column must be either all positive or "
"all negative. Column 'w' contains both positive and negative "
"values"
)
with pytest.raises(ValueError, match=msg):
mixed_df.plot(stacked=True)
@pytest.mark.parametrize("kind", ["line", "area"])
def test_line_area_stacked_positive_idx(self, kind):
df = DataFrame(
np.random.default_rng(2).random((6, 4)), columns=["w", "x", "y", "z"]
)
# Use an index with strictly positive values, preventing
# matplotlib from warning about ignoring xlim
df2 = df.set_index(df.index + 1)
_check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
@pytest.mark.parametrize(
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
)
def test_line_area_nan_df(self, idx):
values1 = [1, 2, np.nan, 3]
values2 = [3, np.nan, 2, 1]
df = DataFrame({"a": values1, "b": values2}, index=idx)
ax = _check_plot_works(df.plot)
masked1 = ax.lines[0].get_ydata()
masked2 = ax.lines[1].get_ydata()
# remove nan for comparison purpose
exp = np.array([1, 2, 3], dtype=np.float64)
tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp)
exp = np.array([3, 2, 1], dtype=np.float64)
tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp)
tm.assert_numpy_array_equal(masked1.mask, np.array([False, False, True, False]))
tm.assert_numpy_array_equal(masked2.mask, np.array([False, True, False, False]))
@pytest.mark.parametrize(
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
)
def test_line_area_nan_df_stacked(self, idx):
values1 = [1, 2, np.nan, 3]
values2 = [3, np.nan, 2, 1]
df = DataFrame({"a": values1, "b": values2}, index=idx)
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
ax = _check_plot_works(df.plot, stacked=True)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
@pytest.mark.parametrize(
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
)
@pytest.mark.parametrize("kwargs", [{}, {"stacked": False}])
def test_line_area_nan_df_stacked_area(self, idx, kwargs):
values1 = [1, 2, np.nan, 3]
values2 = [3, np.nan, 2, 1]
df = DataFrame({"a": values1, "b": values2}, index=idx)
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
ax = _check_plot_works(df.plot.area, **kwargs)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
if kwargs:
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
else:
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
ax = _check_plot_works(df.plot.area, stacked=False)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
@pytest.mark.parametrize("kwargs", [{}, {"secondary_y": True}])
def test_line_lim(self, kwargs):
df = DataFrame(np.random.default_rng(2).random((6, 3)), columns=["x", "y", "z"])
ax = df.plot(**kwargs)
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
def test_line_lim_subplots(self):
df = DataFrame(np.random.default_rng(2).random((6, 3)), columns=["x", "y", "z"])
axes = df.plot(secondary_y=True, subplots=True)
_check_axes_shape(axes, axes_num=3, layout=(3, 1))
for ax in axes:
assert hasattr(ax, "left_ax")
assert not hasattr(ax, "right_ax")
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
@pytest.mark.xfail(
strict=False,
reason="2020-12-01 this has been failing periodically on the "
"ymin==0 assertion for a week or so.",
)
@pytest.mark.parametrize("stacked", [True, False])
def test_area_lim(self, stacked):
df = DataFrame(
np.random.default_rng(2).random((6, 4)), columns=["x", "y", "z", "four"]
)
neg_df = -df
ax = _check_plot_works(df.plot.area, stacked=stacked)
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
assert ymin == 0
ax = _check_plot_works(neg_df.plot.area, stacked=stacked)
ymin, ymax = ax.get_ylim()
assert ymax == 0
def test_area_sharey_dont_overwrite(self):
# GH37942
df = DataFrame(np.random.default_rng(2).random((4, 2)), columns=["x", "y"])
fig, (ax1, ax2) = mpl.pyplot.subplots(1, 2, sharey=True)
df.plot(ax=ax1, kind="area")
df.plot(ax=ax2, kind="area")
assert get_y_axis(ax1).joined(ax1, ax2)
assert get_y_axis(ax2).joined(ax1, ax2)
@pytest.mark.parametrize("stacked", [True, False])
def test_bar_linewidth(self, stacked):
df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
ax = df.plot.bar(stacked=stacked, linewidth=2)
for r in ax.patches:
assert r.get_linewidth() == 2
def test_bar_linewidth_subplots(self):
df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
# subplots
axes = df.plot.bar(linewidth=2, subplots=True)
_check_axes_shape(axes, axes_num=5, layout=(5, 1))
for ax in axes:
for r in ax.patches:
assert r.get_linewidth() == 2
@pytest.mark.parametrize(
"meth, dim", [("bar", "get_width"), ("barh", "get_height")]
)
@pytest.mark.parametrize("stacked", [True, False])
def test_bar_barwidth(self, meth, dim, stacked):
df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
width = 0.9
ax = getattr(df.plot, meth)(stacked=stacked, width=width)
for r in ax.patches:
if not stacked:
assert getattr(r, dim)() == width / len(df.columns)
else:
assert getattr(r, dim)() == width
@pytest.mark.parametrize(
"meth, dim", [("bar", "get_width"), ("barh", "get_height")]
)
def test_barh_barwidth_subplots(self, meth, dim):
df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
width = 0.9
axes = getattr(df.plot, meth)(width=width, subplots=True)
for ax in axes:
for r in ax.patches:
assert getattr(r, dim)() == width
def test_bar_bottom_left_bottom(self):
df = DataFrame(np.random.default_rng(2).random((5, 5)))
ax = df.plot.bar(stacked=False, bottom=1)
result = [p.get_y() for p in ax.patches]
assert result == [1] * 25
ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5])
result = [p.get_y() for p in ax.patches[:5]]
assert result == [-1, -2, -3, -4, -5]
def test_bar_bottom_left_left(self):
df = DataFrame(np.random.default_rng(2).random((5, 5)))
ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1]))
result = [p.get_x() for p in ax.patches]
assert result == [1] * 25
ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5])
result = [p.get_x() for p in ax.patches[:5]]
assert result == [1, 2, 3, 4, 5]
def test_bar_bottom_left_subplots(self):
df = DataFrame(np.random.default_rng(2).random((5, 5)))
axes = df.plot.bar(subplots=True, bottom=-1)
for ax in axes:
result = [p.get_y() for p in ax.patches]
assert result == [-1] * 5
axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1]))
for ax in axes:
result = [p.get_x() for p in ax.patches]
assert result == [1] * 5
def test_bar_nan(self):
df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]})
ax = df.plot.bar()
expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
result = [p.get_height() for p in ax.patches]
assert result == expected
def test_bar_nan_stacked(self):
df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]})
ax = df.plot.bar(stacked=True)
expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
result = [p.get_height() for p in ax.patches]
assert result == expected
result = [p.get_y() for p in ax.patches]
expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
assert result == expected
@pytest.mark.parametrize("idx", [Index, pd.CategoricalIndex])
def test_bar_categorical(self, idx):
# GH 13019
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 5)),
index=idx(list("ABCDEF")),
columns=idx(list("abcde")),
)
ax = df.plot.bar()
ticks = ax.xaxis.get_ticklocs()
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
assert ax.get_xlim() == (-0.5, 5.5)
# check left-edge of bars
assert ax.patches[0].get_x() == -0.25
assert ax.patches[-1].get_x() == 5.15
ax = df.plot.bar(stacked=True)
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
assert ax.get_xlim() == (-0.5, 5.5)
assert ax.patches[0].get_x() == -0.25
assert ax.patches[-1].get_x() == 4.75
@pytest.mark.parametrize("x, y", [("x", "y"), (1, 2)])
def test_plot_scatter(self, x, y):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
_check_plot_works(df.plot.scatter, x=x, y=y)
def test_plot_scatter_error(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
msg = re.escape("scatter() missing 1 required positional argument: 'y'")
with pytest.raises(TypeError, match=msg):
df.plot.scatter(x="x")
msg = re.escape("scatter() missing 1 required positional argument: 'x'")
with pytest.raises(TypeError, match=msg):
df.plot.scatter(y="y")
def test_plot_scatter_shape(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
# GH 6951
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
_check_axes_shape(axes, axes_num=1, layout=(1, 1))
def test_raise_error_on_datetime_time_data(self):
# GH 8113, datetime.time type is not supported by matplotlib in scatter
df = DataFrame(np.random.default_rng(2).standard_normal(10), columns=["a"])
df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time
msg = "must be a string or a (real )?number, not 'datetime.time'"
with pytest.raises(TypeError, match=msg):
df.plot(kind="scatter", x="dtime", y="a")
@pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)])
def test_scatterplot_datetime_data(self, x, y):
# GH 30391
dates = date_range(start=date(2019, 1, 1), periods=12, freq="W")
vals = np.random.default_rng(2).normal(0, 1, len(dates))
df = DataFrame({"dates": dates, "vals": vals})
_check_plot_works(df.plot.scatter, x=x, y=y)
@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
@pytest.mark.parametrize("x, y", [("a", "b"), (0, 1)])
@pytest.mark.parametrize("b_col", [[2, 3, 4], ["a", "b", "c"]])
def test_scatterplot_object_data(self, b_col, x, y, infer_string):
# GH 18755
with option_context("future.infer_string", infer_string):
df = DataFrame({"a": ["A", "B", "C"], "b": b_col})
_check_plot_works(df.plot.scatter, x=x, y=y)
@pytest.mark.parametrize("ordered", [True, False])
@pytest.mark.parametrize(
"categories",
(["setosa", "versicolor", "virginica"], ["versicolor", "virginica", "setosa"]),
)
def test_scatterplot_color_by_categorical(self, ordered, categories):
df = DataFrame(
[[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]],
columns=["length", "width"],
)
df["species"] = pd.Categorical(
["setosa", "setosa", "virginica", "virginica", "versicolor"],
ordered=ordered,
categories=categories,
)
ax = df.plot.scatter(x=0, y=1, c="species")
(colorbar_collection,) = ax.collections
colorbar = colorbar_collection.colorbar
expected_ticks = np.array([0.5, 1.5, 2.5])
result_ticks = colorbar.get_ticks()
tm.assert_numpy_array_equal(result_ticks, expected_ticks)
expected_boundaries = np.array([0.0, 1.0, 2.0, 3.0])
result_boundaries = colorbar._boundaries
tm.assert_numpy_array_equal(result_boundaries, expected_boundaries)
expected_yticklabels = categories
result_yticklabels = [i.get_text() for i in colorbar.ax.get_ymajorticklabels()]
assert all(i == j for i, j in zip(result_yticklabels, expected_yticklabels))
@pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
def test_plot_scatter_with_categorical_data(self, x, y):
# after fixing GH 18755, should be able to plot categorical data
df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])})
_check_plot_works(df.plot.scatter, x=x, y=y)
@pytest.mark.parametrize("x, y, c", [("x", "y", "z"), (0, 1, 2)])
def test_plot_scatter_with_c(self, x, y, c):
df = DataFrame(
np.random.default_rng(2).integers(low=0, high=100, size=(6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
ax = df.plot.scatter(x=x, y=y, c=c)
# default to Greys
assert ax.collections[0].cmap.name == "Greys"
assert ax.collections[0].colorbar.ax.get_ylabel() == "z"
def test_plot_scatter_with_c_props(self):
df = DataFrame(
np.random.default_rng(2).integers(low=0, high=100, size=(6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
cm = "cubehelix"
ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm)
assert ax.collections[0].cmap.name == cm
# verify turning off colorbar works
ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False)
assert ax.collections[0].colorbar is None
# verify that we can still plot a solid color
ax = df.plot.scatter(x=0, y=1, c="red")
assert ax.collections[0].colorbar is None
_check_colors(ax.collections, facecolors=["r"])
def test_plot_scatter_with_c_array(self):
# Ensure that we can pass an np.array straight through to matplotlib,
# this functionality was accidentally removed previously.
# See https://github.com/pandas-dev/pandas/issues/8852 for bug report
#
# Exercise colormap path and non-colormap path as they are independent
#
df = DataFrame({"A": [1, 2], "B": [3, 4]})
red_rgba = [1.0, 0.0, 0.0, 1.0]
green_rgba = [0.0, 1.0, 0.0, 1.0]
rgba_array = np.array([red_rgba, green_rgba])
ax = df.plot.scatter(x="A", y="B", c=rgba_array)
# expect the face colors of the points in the non-colormap path to be
# identical to the values we supplied, normally we'd be on shaky ground
# comparing floats for equality but here we expect them to be
# identical.
tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array)
# we don't test the colors of the faces in this next plot because they
# are dependent on the spring colormap, which may change its colors
# later.
float_array = np.array([0.0, 1.0])
df.plot.scatter(x="A", y="B", c=float_array, cmap="spring")
def test_plot_scatter_with_s(self):
# this refers to GH 32904
df = DataFrame(
np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"]
)
ax = df.plot.scatter(x="a", y="b", s="c")
tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes())
def test_plot_scatter_with_norm(self):
# added while fixing GH 45809
df = DataFrame(
np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"]
)
norm = mpl.colors.LogNorm()
ax = df.plot.scatter(x="a", y="b", c="c", norm=norm)
assert ax.collections[0].norm is norm
def test_plot_scatter_without_norm(self):
# added while fixing GH 45809
df = DataFrame(
np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"]
)
ax = df.plot.scatter(x="a", y="b", c="c")
plot_norm = ax.collections[0].norm
color_min_max = (df.c.min(), df.c.max())
default_norm = mpl.colors.Normalize(*color_min_max)
for value in df.c:
assert plot_norm(value) == default_norm(value)
@pytest.mark.slow
@pytest.mark.parametrize(
"kwargs",
[
{},
{"legend": False},
{"default_axes": True, "subplots": True},
{"stacked": True},
],
)
def test_plot_bar(self, kwargs):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["one", "two", "three", "four"],
)
_check_plot_works(df.plot.bar, **kwargs)
@pytest.mark.slow
def test_plot_bar_int_col(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 15)),
index=list(string.ascii_letters[:10]),
columns=range(15),
)
_check_plot_works(df.plot.bar)
@pytest.mark.slow
def test_plot_bar_ticks(self):
df = DataFrame({"a": [0, 1], "b": [1, 0]})
ax = _check_plot_works(df.plot.bar)
_check_ticks_props(ax, xrot=90)
ax = df.plot.bar(rot=35, fontsize=10)
_check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10)
@pytest.mark.slow
def test_plot_barh_ticks(self):
df = DataFrame({"a": [0, 1], "b": [1, 0]})
ax = _check_plot_works(df.plot.barh)
_check_ticks_props(ax, yrot=0)
ax = df.plot.barh(rot=55, fontsize=11)
_check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11)
def test_boxplot(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
ax = _check_plot_works(df.plot.box)
_check_text_labels(ax.get_xticklabels(), labels)
tm.assert_numpy_array_equal(
ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1)
)
assert len(ax.lines) == 7 * len(numeric_cols)
def test_boxplot_series(self, hist_df):
df = hist_df
series = df["height"]
axes = series.plot.box(rot=40)
_check_ticks_props(axes, xrot=40, yrot=0)
_check_plot_works(series.plot.box)
def test_boxplot_series_positions(self, hist_df):
df = hist_df
positions = np.array([1, 6, 7])
ax = df.plot.box(positions=positions)
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
_check_text_labels(ax.get_xticklabels(), labels)
tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
def test_boxplot_vertical(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
# if horizontal, yticklabels are rotated
ax = df.plot.box(rot=50, fontsize=8, vert=False)
_check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
_check_text_labels(ax.get_yticklabels(), labels)
assert len(ax.lines) == 7 * len(numeric_cols)
@pytest.mark.filterwarnings("ignore:Attempt:UserWarning")
def test_boxplot_vertical_subplots(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
axes = _check_plot_works(
df.plot.box,
default_axes=True,
subplots=True,
vert=False,
logx=True,
)
_check_axes_shape(axes, axes_num=3, layout=(1, 3))
_check_ax_scales(axes, xaxis="log")
for ax, label in zip(axes, labels):
_check_text_labels(ax.get_yticklabels(), [label])
assert len(ax.lines) == 7
def test_boxplot_vertical_positions(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
positions = np.array([3, 2, 8])
ax = df.plot.box(positions=positions, vert=False)
_check_text_labels(ax.get_yticklabels(), labels)
tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
def test_boxplot_return_type_invalid(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["one", "two", "three", "four"],
)
msg = "return_type must be {None, 'axes', 'dict', 'both'}"
with pytest.raises(ValueError, match=msg):
df.plot.box(return_type="not_a_type")
@pytest.mark.parametrize("return_type", ["dict", "axes", "both"])
def test_boxplot_return_type_invalid_type(self, return_type):
df = DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
index=list(string.ascii_letters[:6]),
columns=["one", "two", "three", "four"],
)
result = df.plot.box(return_type=return_type)
_check_box_return_type(result, return_type)
def test_kde_df(self):
pytest.importorskip("scipy")
df = DataFrame(np.random.default_rng(2).standard_normal((100, 4)))
ax = _check_plot_works(df.plot, kind="kde")
expected = [pprint_thing(c) for c in df.columns]
_check_legend_labels(ax, labels=expected)
_check_ticks_props(ax, xrot=0)
def test_kde_df_rot(self):
pytest.importorskip("scipy")
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
ax = df.plot(kind="kde", rot=20, fontsize=5)
_check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5)
def test_kde_df_subplots(self):
pytest.importorskip("scipy")
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
axes = _check_plot_works(
df.plot,
default_axes=True,
kind="kde",
subplots=True,
)
_check_axes_shape(axes, axes_num=4, layout=(4, 1))
def test_kde_df_logy(self):
pytest.importorskip("scipy")
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
axes = df.plot(kind="kde", logy=True, subplots=True)
_check_ax_scales(axes, yaxis="log")
def test_kde_missing_vals(self):
pytest.importorskip("scipy")
df = DataFrame(np.random.default_rng(2).uniform(size=(100, 4)))
df.loc[0, 0] = np.nan
_check_plot_works(df.plot, kind="kde")
def test_hist_df(self):
df = DataFrame(np.random.default_rng(2).standard_normal((100, 4)))
ax = _check_plot_works(df.plot.hist)
expected = [pprint_thing(c) for c in df.columns]
_check_legend_labels(ax, labels=expected)
axes = _check_plot_works(
df.plot.hist,
default_axes=True,
subplots=True,
logy=True,
)
_check_axes_shape(axes, axes_num=4, layout=(4, 1))
_check_ax_scales(axes, yaxis="log")
def test_hist_df_series(self):
series = Series(np.random.default_rng(2).random(10))
axes = series.plot.hist(rot=40)
_check_ticks_props(axes, xrot=40, yrot=0)
def test_hist_df_series_cumulative_density(self):
from matplotlib.patches import Rectangle
series = Series(np.random.default_rng(2).random(10))
ax = series.plot.hist(cumulative=True, bins=4, density=True)
# height of last bin (index 5) must be 1.0
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-1].get_height(), 1.0)
def test_hist_df_series_cumulative(self):
from matplotlib.patches import Rectangle
series = Series(np.random.default_rng(2).random(10))
ax = series.plot.hist(cumulative=True, bins=4)
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-2].get_height(), 10.0)
def test_hist_df_orientation(self):
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
# if horizontal, yticklabels are rotated
axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal")
_check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)
@pytest.mark.parametrize(
"weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))]
)
def test_hist_weights(self, weights):
# GH 33173
df = DataFrame(
dict(zip(["A", "B"], np.random.default_rng(2).standard_normal((2, 100))))
)
ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
ax2 = _check_plot_works(df.plot, kind="hist")
patch_height_with_weights = [patch.get_height() for patch in ax1.patches]
# original heights with no weights, and we manually multiply with example
# weights, so after multiplication, they should be almost same
expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches]
tm.assert_almost_equal(patch_height_with_weights, expected_patch_height)
def _check_box_coord(
self,
patches,
expected_y=None,
expected_h=None,
expected_x=None,
expected_w=None,
):
result_y = np.array([p.get_y() for p in patches])
result_height = np.array([p.get_height() for p in patches])
result_x = np.array([p.get_x() for p in patches])
result_width = np.array([p.get_width() for p in patches])
# dtype is depending on above values, no need to check
if expected_y is not None:
tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False)
if expected_h is not None:
tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False)
if expected_x is not None:
tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False)
if expected_w is not None:
tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False)
@pytest.mark.parametrize(
"data",
[
{
"A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])),
"B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])),
"C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])),
},
{
"A": np.repeat(
np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6])
),
"B": np.repeat(
np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8])
),
"C": np.repeat(
np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10])
),
},
],
)
def test_hist_df_coord(self, data):
df = DataFrame(data)
ax = df.plot.hist(bins=5)
self._check_box_coord(
ax.patches[:5],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
ax = df.plot.hist(bins=5, stacked=True)
self._check_box_coord(
ax.patches[:5],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_y=np.array([10, 9, 8, 7, 6]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_y=np.array([18, 17, 16, 15, 14]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
axes = df.plot.hist(bins=5, stacked=True, subplots=True)
self._check_box_coord(
axes[0].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
axes[1].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
axes[2].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
# horizontal
ax = df.plot.hist(bins=5, orientation="horizontal")
self._check_box_coord(
ax.patches[:5],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal")
self._check_box_coord(
ax.patches[:5],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_x=np.array([10, 9, 8, 7, 6]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_x=np.array([18, 17, 16, 15, 14]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
axes = df.plot.hist(
bins=5, stacked=True, subplots=True, orientation="horizontal"
)
self._check_box_coord(
axes[0].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
axes[1].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
axes[2].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
def test_plot_int_columns(self):
df = DataFrame(np.random.default_rng(2).standard_normal((100, 4))).cumsum()
_check_plot_works(df.plot, legend=True)
@pytest.mark.parametrize(
"markers",
[
{0: "^", 1: "+", 2: "o"},
{0: "^", 1: "+"},
["^", "+", "o"],
["^", "+"],
],
)
def test_style_by_column(self, markers):
import matplotlib.pyplot as plt
fig = plt.gcf()
fig.clf()
fig.add_subplot(111)
df = DataFrame(np.random.default_rng(2).standard_normal((10, 3)))
ax = df.plot(style=markers)
for idx, line in enumerate(ax.get_lines()[: len(markers)]):
assert line.get_marker() == markers[idx]
def test_line_label_none(self):
s = Series([1, 2])
ax = s.plot()
assert ax.get_legend() is None
ax = s.plot(legend=True)
assert ax.get_legend().get_texts()[0].get_text() == ""
@pytest.mark.parametrize(
"props, expected",
[
("boxprops", "boxes"),
("whiskerprops", "whiskers"),
("capprops", "caps"),
("medianprops", "medians"),
],
)
def test_specified_props_kwd_plot_box(self, props, expected):
# GH 30346
df = DataFrame({k: np.random.default_rng(2).random(100) for k in "ABC"})
kwd = {props: {"color": "C1"}}
result = df.plot.box(return_type="dict", **kwd)
assert result[expected][0].get_color() == "C1"
def test_unordered_ts(self):
# GH#2609, GH#55906
index = [date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)]
values = [3.0, 2.0, 1.0]
df = DataFrame(
np.array(values),
index=index,
columns=["test"],
)
ax = df.plot()
xticks = ax.lines[0].get_xdata()
tm.assert_numpy_array_equal(xticks, np.array(index, dtype=object))
ydata = ax.lines[0].get_ydata()
tm.assert_numpy_array_equal(ydata, np.array(values))
# even though we don't sort the data before passing it to matplotlib,
# the ticks are sorted
xticks = ax.xaxis.get_ticklabels()
xlocs = [x.get_position()[0] for x in xticks]
assert Index(xlocs).is_monotonic_increasing
xlabels = [x.get_text() for x in xticks]
assert pd.to_datetime(xlabels, format="%Y-%m-%d").is_monotonic_increasing
@pytest.mark.parametrize("kind", plotting.PlotAccessor._common_kinds)
def test_kind_both_ways(self, kind):
pytest.importorskip("scipy")
df = DataFrame({"x": [1, 2, 3]})
df.plot(kind=kind)
getattr(df.plot, kind)()
@pytest.mark.parametrize("kind", ["scatter", "hexbin"])
def test_kind_both_ways_x_y(self, kind):
pytest.importorskip("scipy")
df = DataFrame({"x": [1, 2, 3]})
df.plot("x", "x", kind=kind)
getattr(df.plot, kind)("x", "x")
@pytest.mark.parametrize("kind", plotting.PlotAccessor._common_kinds)
def test_all_invalid_plot_data(self, kind):
df = DataFrame(list("abcd"))
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)
@pytest.mark.parametrize(
"kind", list(plotting.PlotAccessor._common_kinds) + ["area"]
)
def test_partially_invalid_plot_data_numeric(self, kind):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 2)),
dtype=object,
)
df[np.random.default_rng(2).random(df.shape[0]) > 0.5] = "a"
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)
def test_invalid_kind(self):
df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
msg = "invalid_plot_kind is not a valid plot kind"
with pytest.raises(ValueError, match=msg):
df.plot(kind="invalid_plot_kind")
@pytest.mark.parametrize(
"x,y,lbl",
[
(["B", "C"], "A", "a"),
(["A"], ["B", "C"], ["b", "c"]),
],
)
def test_invalid_xy_args(self, x, y, lbl):
# GH 18671, 19699 allows y to be list-like but not x
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
with pytest.raises(ValueError, match="x must be a label or position"):
df.plot(x=x, y=y, label=lbl)
def test_bad_label(self):
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
msg = "label should be list-like and same length as y"
with pytest.raises(ValueError, match=msg):
df.plot(x="A", y=["B", "C"], label="bad_label")
@pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")])
def test_invalid_xy_args_dup_cols(self, x, y):
# GH 18671, 19699 allows y to be list-like but not x
df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB"))
with pytest.raises(ValueError, match="x must be a label or position"):
df.plot(x=x, y=y)
@pytest.mark.parametrize(
"x,y,lbl,colors",
[
("A", ["B"], ["b"], ["red"]),
("A", ["B", "C"], ["b", "c"], ["red", "blue"]),
(0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]),
],
)
def test_y_listlike(self, x, y, lbl, colors):
# GH 19699: tests list-like y and verifies lbls & colors
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
_check_plot_works(df.plot, x="A", y=y, label=lbl)
ax = df.plot(x=x, y=y, label=lbl, color=colors)
assert len(ax.lines) == len(y)
_check_colors(ax.get_lines(), linecolors=colors)
@pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])])
def test_xy_args_integer(self, x, y, colnames):
# GH 20056: tests integer args for xy and checks col names
df = DataFrame({"A": [1, 2], "B": [3, 4]})
df.columns = colnames
_check_plot_works(df.plot, x=x, y=y)
def test_hexbin_basic(self):
df = DataFrame(
{
"A": np.random.default_rng(2).uniform(size=20),
"B": np.random.default_rng(2).uniform(size=20),
"C": np.arange(20) + np.random.default_rng(2).uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", gridsize=10)
# TODO: need better way to test. This just does existence.
assert len(ax.collections) == 1
def test_hexbin_basic_subplots(self):
df = DataFrame(
{
"A": np.random.default_rng(2).uniform(size=20),
"B": np.random.default_rng(2).uniform(size=20),
"C": np.arange(20) + np.random.default_rng(2).uniform(size=20),
}
)
# GH 6951
axes = df.plot.hexbin(x="A", y="B", subplots=True)
# hexbin should have 2 axes in the figure, 1 for plotting and another
# is colorbar
assert len(axes[0].figure.axes) == 2
# return value is single axes
_check_axes_shape(axes, axes_num=1, layout=(1, 1))
@pytest.mark.parametrize("reduce_C", [None, np.std])
def test_hexbin_with_c(self, reduce_C):
df = DataFrame(
{
"A": np.random.default_rng(2).uniform(size=20),
"B": np.random.default_rng(2).uniform(size=20),
"C": np.arange(20) + np.random.default_rng(2).uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=reduce_C)
assert len(ax.collections) == 1
@pytest.mark.parametrize(
"kwargs, expected",
[
({}, "BuGn"), # default cmap
({"colormap": "cubehelix"}, "cubehelix"),
({"cmap": "YlGn"}, "YlGn"),
],
)
def test_hexbin_cmap(self, kwargs, expected):
df = DataFrame(
{
"A": np.random.default_rng(2).uniform(size=20),
"B": np.random.default_rng(2).uniform(size=20),
"C": np.arange(20) + np.random.default_rng(2).uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", **kwargs)
assert ax.collections[0].cmap.name == expected
def test_pie_df_err(self):
df = DataFrame(
np.random.default_rng(2).random((5, 3)),
columns=["X", "Y", "Z"],
index=["a", "b", "c", "d", "e"],
)
msg = "pie requires either y column or 'subplots=True'"
with pytest.raises(ValueError, match=msg):
df.plot.pie()
@pytest.mark.parametrize("y", ["Y", 2])
def test_pie_df(self, y):
df = DataFrame(
np.random.default_rng(2).random((5, 3)),
columns=["X", "Y", "Z"],
index=["a", "b", "c", "d", "e"],
)
ax = _check_plot_works(df.plot.pie, y=y)
_check_text_labels(ax.texts, df.index)
def test_pie_df_subplots(self):
df = DataFrame(
np.random.default_rng(2).random((5, 3)),
columns=["X", "Y", "Z"],
index=["a", "b", "c", "d", "e"],
)
axes = _check_plot_works(
df.plot.pie,
default_axes=True,
subplots=True,
)
assert len(axes) == len(df.columns)
for ax in axes:
_check_text_labels(ax.texts, df.index)
for ax, ylabel in zip(axes, df.columns):
assert ax.get_ylabel() == ylabel
def test_pie_df_labels_colors(self):
df = DataFrame(
np.random.default_rng(2).random((5, 3)),
columns=["X", "Y", "Z"],
index=["a", "b", "c", "d", "e"],
)
labels = ["A", "B", "C", "D", "E"]
color_args = ["r", "g", "b", "c", "m"]
axes = _check_plot_works(
df.plot.pie,
default_axes=True,
subplots=True,
labels=labels,
colors=color_args,
)
assert len(axes) == len(df.columns)
for ax in axes:
_check_text_labels(ax.texts, labels)
_check_colors(ax.patches, facecolors=color_args)
def test_pie_df_nan(self):
df = DataFrame(np.random.default_rng(2).random((4, 4)))
for i in range(4):
df.iloc[i, i] = np.nan
_, axes = mpl.pyplot.subplots(ncols=4)
# GH 37668
kwargs = {"normalize": True}
with tm.assert_produces_warning(None):
df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs)
base_expected = ["0", "1", "2", "3"]
for i, ax in enumerate(axes):
expected = list(base_expected) # force copy
expected[i] = ""
result = [x.get_text() for x in ax.texts]
assert result == expected
# legend labels
# NaN's not included in legend with subplots
# see https://github.com/pandas-dev/pandas/issues/8390
result_labels = [x.get_text() for x in ax.get_legend().get_texts()]
expected_labels = base_expected[:i] + base_expected[i + 1 :]
assert result_labels == expected_labels
@pytest.mark.slow
@pytest.mark.parametrize(
"kwargs",
[
{"logy": True},
{"logx": True, "logy": True},
{"loglog": True},
],
)
def test_errorbar_plot(self, kwargs):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
df_err = DataFrame(d_err)
# check line plots
ax = _check_plot_works(df.plot, yerr=df_err, **kwargs)
_check_has_errorbars(ax, xerr=0, yerr=2)
@pytest.mark.slow
def test_errorbar_plot_bar(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
df_err = DataFrame(d_err)
ax = _check_plot_works(
(df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True
)
_check_has_errorbars(ax, xerr=2, yerr=2)
@pytest.mark.slow
def test_errorbar_plot_yerr_array(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
# yerr is raw error values
ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4)
_check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)
_check_has_errorbars(ax, xerr=0, yerr=2)
@pytest.mark.slow
@pytest.mark.parametrize("yerr", ["yerr", "誤差"])
def test_errorbar_plot_column_name(self, yerr):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
df[yerr] = np.ones(12) * 0.2
ax = _check_plot_works(df.plot, yerr=yerr)
_check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, y="y", x="x", yerr=yerr)
_check_has_errorbars(ax, xerr=0, yerr=1)
@pytest.mark.slow
def test_errorbar_plot_external_valueerror(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
with tm.external_error_raised(ValueError):
df.plot(yerr=np.random.default_rng(2).standard_normal(11))
@pytest.mark.slow
def test_errorbar_plot_external_typeerror(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12})
with tm.external_error_raised(TypeError):
df.plot(yerr=df_err)
@pytest.mark.slow
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
@pytest.mark.parametrize(
"y_err",
[
Series(np.ones(12) * 0.2, name="x"),
DataFrame({"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}),
],
)
def test_errorbar_plot_different_yerr(self, kind, y_err):
df = DataFrame({"x": np.arange(12), "y": np.arange(12, 0, -1)})
ax = _check_plot_works(df.plot, yerr=y_err, kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=2)
@pytest.mark.slow
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
@pytest.mark.parametrize(
"y_err, x_err",
[
(
DataFrame({"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}),
DataFrame({"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}),
),
(Series(np.ones(12) * 0.2, name="x"), Series(np.ones(12) * 0.2, name="x")),
(0.2, 0.2),
],
)
def test_errorbar_plot_different_yerr_xerr(self, kind, y_err, x_err):
df = DataFrame({"x": np.arange(12), "y": np.arange(12, 0, -1)})
ax = _check_plot_works(df.plot, yerr=y_err, xerr=x_err, kind=kind)
_check_has_errorbars(ax, xerr=2, yerr=2)
@pytest.mark.slow
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
def test_errorbar_plot_different_yerr_xerr_subplots(self, kind):
df = DataFrame({"x": np.arange(12), "y": np.arange(12, 0, -1)})
df_err = DataFrame({"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4})
axes = _check_plot_works(
df.plot,
default_axes=True,
yerr=df_err,
xerr=df_err,
subplots=True,
kind=kind,
)
_check_has_errorbars(axes, xerr=1, yerr=1)
@pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError)
def test_errorbar_plot_iterator(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
# yerr is iterator
ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df)))
_check_has_errorbars(ax, xerr=0, yerr=2)
def test_errorbar_with_integer_column_names(self):
# test with integer column names
df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 2))))
df_err = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 2))))
ax = _check_plot_works(df.plot, yerr=df_err)
_check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, y=0, yerr=1)
_check_has_errorbars(ax, xerr=0, yerr=1)
@pytest.mark.slow
@pytest.mark.parametrize("kind", ["line", "bar"])
def test_errorbar_with_partial_columns_kind(self, kind):
df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 3))))
df_err = DataFrame(
np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2]
)
ax = _check_plot_works(df.plot, yerr=df_err, kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=2)
@pytest.mark.slow
def test_errorbar_with_partial_columns_dti(self):
df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 3))))
df_err = DataFrame(
np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2]
)
ix = date_range("1/1/2000", periods=10, freq="ME")
df.set_index(ix, inplace=True)
df_err.set_index(ix, inplace=True)
ax = _check_plot_works(df.plot, yerr=df_err, kind="line")
_check_has_errorbars(ax, xerr=0, yerr=2)
@pytest.mark.slow
@pytest.mark.parametrize("err_box", [lambda x: x, DataFrame])
def test_errorbar_with_partial_columns_box(self, err_box):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
err = err_box({"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4})
ax = _check_plot_works(df.plot, yerr=err)
_check_has_errorbars(ax, xerr=0, yerr=1)
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
def test_errorbar_timeseries(self, kind):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
# check time-series plots
ix = date_range("1/1/2000", "1/1/2001", freq="ME")
tdf = DataFrame(d, index=ix)
tdf_err = DataFrame(d_err, index=ix)
ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
_check_has_errorbars(ax, xerr=0, yerr=2)
axes = _check_plot_works(
tdf.plot,
default_axes=True,
kind=kind,
yerr=tdf_err,
subplots=True,
)
_check_has_errorbars(axes, xerr=0, yerr=1)
def test_errorbar_asymmetrical(self):
err = np.random.default_rng(2).random((3, 2, 5))
# each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]...
df = DataFrame(np.arange(15).reshape(3, 5)).T
ax = df.plot(yerr=err, xerr=err / 2)
yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1]
expected_0_0 = err[0, :, 0] * np.array([-1, 1])
tm.assert_almost_equal(yerr_0_0, expected_0_0)
msg = re.escape(
"Asymmetrical error bars should be provided with the shape (3, 2, 5)"
)
with pytest.raises(ValueError, match=msg):
df.plot(yerr=err.T)
def test_table(self):
df = DataFrame(
np.random.default_rng(2).random((10, 3)),
index=list(string.ascii_letters[:10]),
)
_check_plot_works(df.plot, table=True)
_check_plot_works(df.plot, table=df)
# GH 35945 UserWarning
with tm.assert_produces_warning(None):
ax = df.plot()
assert len(ax.tables) == 0
plotting.table(ax, df.T)
assert len(ax.tables) == 1
def test_errorbar_scatter(self):
df = DataFrame(
np.abs(np.random.default_rng(2).standard_normal((5, 2))),
index=range(5),
columns=["x", "y"],
)
df_err = DataFrame(
np.abs(np.random.default_rng(2).standard_normal((5, 2))) / 5,
index=range(5),
columns=["x", "y"],
)
ax = _check_plot_works(df.plot.scatter, x="x", y="y")
_check_has_errorbars(ax, xerr=0, yerr=0)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err)
_check_has_errorbars(ax, xerr=1, yerr=0)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err)
_check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err)
_check_has_errorbars(ax, xerr=1, yerr=1)
def test_errorbar_scatter_color(self):
def _check_errorbar_color(containers, expected, has_err="has_xerr"):
lines = []
errs = next(c.lines for c in ax.containers if getattr(c, has_err, False))
for el in errs:
if is_list_like(el):
lines.extend(el)
else:
lines.append(el)
err_lines = [x for x in lines if x in ax.collections]
_check_colors(err_lines, linecolors=np.array([expected] * len(err_lines)))
# GH 8081
df = DataFrame(
np.abs(np.random.default_rng(2).standard_normal((10, 5))),
columns=["a", "b", "c", "d", "e"],
)
ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red")
_check_has_errorbars(ax, xerr=1, yerr=1)
_check_errorbar_color(ax.containers, "red", has_err="has_xerr")
_check_errorbar_color(ax.containers, "red", has_err="has_yerr")
ax = df.plot.scatter(x="a", y="b", yerr="e", color="green")
_check_has_errorbars(ax, xerr=0, yerr=1)
_check_errorbar_color(ax.containers, "green", has_err="has_yerr")
def test_scatter_unknown_colormap(self):
# GH#48726
df = DataFrame({"a": [1, 2, 3], "b": 4})
with pytest.raises((ValueError, KeyError), match="'unknown' is not a"):
df.plot(x="a", y="b", colormap="unknown", kind="scatter")
def test_sharex_and_ax(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
# the axis in fig.get_axis() are sorted differently than pandas
# expected them, so make sure that only the right ones are removed
import matplotlib.pyplot as plt
plt.close("all")
gs, axes = _generate_4_axes_via_gridspec()
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
def _check(axes):
for ax in axes:
assert len(ax.lines) == 1
_check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[0], axes[2]]:
_check_visible(ax.get_xticklabels(), visible=False)
_check_visible(ax.get_xticklabels(minor=True), visible=False)
for ax in [axes[1], axes[3]]:
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax, sharex=True)
gs.tight_layout(plt.gcf())
_check(axes)
plt.close("all")
gs, axes = _generate_4_axes_via_gridspec()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True)
_check(axes)
def test_sharex_false_and_ax(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
# the axis in fig.get_axis() are sorted differently than pandas
# expected them, so make sure that only the right ones are removed
import matplotlib.pyplot as plt
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
gs, axes = _generate_4_axes_via_gridspec()
# without sharex, no labels should be touched!
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax)
gs.tight_layout(plt.gcf())
for ax in axes:
assert len(ax.lines) == 1
_check_visible(ax.get_yticklabels(), visible=True)
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
def test_sharey_and_ax(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
# the axis in fig.get_axis() are sorted differently than pandas
# expected them, so make sure that only the right ones are removed
import matplotlib.pyplot as plt
gs, axes = _generate_4_axes_via_gridspec()
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
def _check(axes):
for ax in axes:
assert len(ax.lines) == 1
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
for ax in [axes[0], axes[1]]:
_check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[2], axes[3]]:
_check_visible(ax.get_yticklabels(), visible=False)
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax, sharey=True)
gs.tight_layout(plt.gcf())
_check(axes)
plt.close("all")
gs, axes = _generate_4_axes_via_gridspec()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharey=True)
gs.tight_layout(plt.gcf())
_check(axes)
def test_sharey_and_ax_tight(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
import matplotlib.pyplot as plt
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
gs, axes = _generate_4_axes_via_gridspec()
# without sharex, no labels should be touched!
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax)
gs.tight_layout(plt.gcf())
for ax in axes:
assert len(ax.lines) == 1
_check_visible(ax.get_yticklabels(), visible=True)
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
@pytest.mark.parametrize("kind", plotting.PlotAccessor._all_kinds)
def test_memory_leak(self, kind):
"""Check that every plot type gets properly collected."""
pytest.importorskip("scipy")
args = {}
if kind in ["hexbin", "scatter", "pie"]:
df = DataFrame(
{
"A": np.random.default_rng(2).uniform(size=20),
"B": np.random.default_rng(2).uniform(size=20),
"C": np.arange(20) + np.random.default_rng(2).uniform(size=20),
}
)
args = {"x": "A", "y": "B"}
elif kind == "area":
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
).abs()
else:
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
# Use a weakref so we can see if the object gets collected without
# also preventing it from being collected
ref = weakref.ref(df.plot(kind=kind, **args))
# have matplotlib delete all the figures
plt.close("all")
# force a garbage collection
gc.collect()
assert ref() is None
def test_df_gridspec_patterns_vert_horiz(self):
# GH 10819
from matplotlib import gridspec
import matplotlib.pyplot as plt
ts = Series(
np.random.default_rng(2).standard_normal(10),
index=date_range("1/1/2000", periods=10),
)
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 2)),
index=ts.index,
columns=list("AB"),
)
def _get_vertical_grid():
gs = gridspec.GridSpec(3, 1)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:2, :])
ax2 = fig.add_subplot(gs[2, :])
return ax1, ax2
def _get_horizontal_grid():
gs = gridspec.GridSpec(1, 3)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:, :2])
ax2 = fig.add_subplot(gs[:, 2])
return ax1, ax2
for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
ax1 = ts.plot(ax=ax1)
assert len(ax1.lines) == 1
ax2 = df.plot(ax=ax2)
assert len(ax2.lines) == 2
for ax in [ax1, ax2]:
_check_visible(ax.get_yticklabels(), visible=True)
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
plt.close("all")
# subplots=True
for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
axes = df.plot(subplots=True, ax=[ax1, ax2])
assert len(ax1.lines) == 1
assert len(ax2.lines) == 1
for ax in axes:
_check_visible(ax.get_yticklabels(), visible=True)
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
plt.close("all")
# vertical / subplots / sharex=True / sharey=True
ax1, ax2 = _get_vertical_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
assert len(axes[0].lines) == 1
assert len(axes[1].lines) == 1
for ax in [ax1, ax2]:
# yaxis are visible because there is only one column
_check_visible(ax.get_yticklabels(), visible=True)
# xaxis of axes0 (top) are hidden
_check_visible(axes[0].get_xticklabels(), visible=False)
_check_visible(axes[0].get_xticklabels(minor=True), visible=False)
_check_visible(axes[1].get_xticklabels(), visible=True)
_check_visible(axes[1].get_xticklabels(minor=True), visible=True)
plt.close("all")
# horizontal / subplots / sharex=True / sharey=True
ax1, ax2 = _get_horizontal_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
assert len(axes[0].lines) == 1
assert len(axes[1].lines) == 1
_check_visible(axes[0].get_yticklabels(), visible=True)
# yaxis of axes1 (right) are hidden
_check_visible(axes[1].get_yticklabels(), visible=False)
for ax in [ax1, ax2]:
# xaxis are visible because there is only one column
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
plt.close("all")
def test_df_gridspec_patterns_boxed(self):
# GH 10819
from matplotlib import gridspec
import matplotlib.pyplot as plt
ts = Series(
np.random.default_rng(2).standard_normal(10),
index=date_range("1/1/2000", periods=10),
)
# boxed
def _get_boxed_grid():
gs = gridspec.GridSpec(3, 3)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:2, :2])
ax2 = fig.add_subplot(gs[:2, 2])
ax3 = fig.add_subplot(gs[2, :2])
ax4 = fig.add_subplot(gs[2, 2])
return ax1, ax2, ax3, ax4
axes = _get_boxed_grid()
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
index=ts.index,
columns=list("ABCD"),
)
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
# axis are visible because these are not shared
_check_visible(ax.get_yticklabels(), visible=True)
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
plt.close("all")
# subplots / sharex=True / sharey=True
axes = _get_boxed_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True)
for ax in axes:
assert len(ax.lines) == 1
for ax in [axes[0], axes[2]]: # left column
_check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[1], axes[3]]: # right column
_check_visible(ax.get_yticklabels(), visible=False)
for ax in [axes[0], axes[1]]: # top row
_check_visible(ax.get_xticklabels(), visible=False)
_check_visible(ax.get_xticklabels(minor=True), visible=False)
for ax in [axes[2], axes[3]]: # bottom row
_check_visible(ax.get_xticklabels(), visible=True)
_check_visible(ax.get_xticklabels(minor=True), visible=True)
plt.close("all")
def test_df_grid_settings(self):
# Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
_check_grid_settings(
DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}),
plotting.PlotAccessor._dataframe_kinds,
kws={"x": "a", "y": "b"},
)
def test_plain_axes(self):
# supplied ax itself is a SubplotAxes, but figure contains also
# a plain Axes object (GH11556)
fig, ax = mpl.pyplot.subplots()
fig.add_axes([0.2, 0.2, 0.2, 0.2])
Series(np.random.default_rng(2).random(10)).plot(ax=ax)
def test_plain_axes_df(self):
# supplied ax itself is a plain Axes, but because the cmap keyword
# a new ax is created for the colorbar -> also multiples axes (GH11520)
df = DataFrame(
{
"a": np.random.default_rng(2).standard_normal(8),
"b": np.random.default_rng(2).standard_normal(8),
}
)
fig = mpl.pyplot.figure()
ax = fig.add_axes((0, 0, 1, 1))
df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv")
def test_plain_axes_make_axes_locatable(self):
# other examples
fig, ax = mpl.pyplot.subplots()
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
Series(np.random.default_rng(2).random(10)).plot(ax=ax)
Series(np.random.default_rng(2).random(10)).plot(ax=cax)
def test_plain_axes_make_inset_axes(self):
fig, ax = mpl.pyplot.subplots()
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
iax = inset_axes(ax, width="30%", height=1.0, loc=3)
Series(np.random.default_rng(2).random(10)).plot(ax=ax)
Series(np.random.default_rng(2).random(10)).plot(ax=iax)
@pytest.mark.parametrize("method", ["line", "barh", "bar"])
def test_secondary_axis_font_size(self, method):
# GH: 12565
df = (
DataFrame(
np.random.default_rng(2).standard_normal((15, 2)), columns=list("AB")
)
.assign(C=lambda df: df.B.cumsum())
.assign(D=lambda df: df.C * 1.1)
)
fontsize = 20
sy = ["C", "D"]
kwargs = {"secondary_y": sy, "fontsize": fontsize, "mark_right": True}
ax = getattr(df.plot, method)(**kwargs)
_check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize)
def test_x_string_values_ticks(self):
# Test if string plot index have a fixed xtick position
# GH: 7612, GH: 22334
df = DataFrame(
{
"sales": [3, 2, 3],
"visits": [20, 42, 28],
"day": ["Monday", "Tuesday", "Wednesday"],
}
)
ax = df.plot.area(x="day")
ax.set_xlim(-1, 3)
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
labels_position = dict(zip(xticklabels, ax.get_xticks()))
# Testing if the label stayed at the right position
assert labels_position["Monday"] == 0.0
assert labels_position["Tuesday"] == 1.0
assert labels_position["Wednesday"] == 2.0
def test_x_multiindex_values_ticks(self):
# Test if multiindex plot index have a fixed xtick position
# GH: 15912
index = MultiIndex.from_product([[2012, 2013], [1, 2]])
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 2)),
columns=["A", "B"],
index=index,
)
ax = df.plot()
ax.set_xlim(-1, 4)
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
labels_position = dict(zip(xticklabels, ax.get_xticks()))
# Testing if the label stayed at the right position
assert labels_position["(2012, 1)"] == 0.0
assert labels_position["(2012, 2)"] == 1.0
assert labels_position["(2013, 1)"] == 2.0
assert labels_position["(2013, 2)"] == 3.0
@pytest.mark.parametrize("kind", ["line", "area"])
def test_xlim_plot_line(self, kind):
# test if xlim is set correctly in plot.line and plot.area
# GH 27686
df = DataFrame([2, 4], index=[1, 2])
ax = df.plot(kind=kind)
xlims = ax.get_xlim()
assert xlims[0] < 1
assert xlims[1] > 2
def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
# test if xlim is set correctly when ax contains multiple different kinds
# of plots, GH 27686
fig, ax = mpl.pyplot.subplots()
indexes = ["k1", "k2", "k3", "k4"]
df = DataFrame(
{
"s1": [1000, 2000, 1500, 2000],
"s2": [900, 1400, 2000, 3000],
"s3": [1500, 1500, 1600, 1200],
"secondary_y": [1, 3, 4, 3],
},
index=indexes,
)
df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False)
df[["secondary_y"]].plot(ax=ax, secondary_y=True)
xlims = ax.get_xlim()
assert xlims[0] < 0
assert xlims[1] > 3
# make sure axis labels are plotted correctly as well
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
assert xticklabels == indexes
def test_plot_no_rows(self):
# GH 27758
df = DataFrame(columns=["foo"], dtype=int)
assert df.empty
ax = df.plot()
assert len(ax.get_lines()) == 1
line = ax.get_lines()[0]
assert len(line.get_xdata()) == 0
assert len(line.get_ydata()) == 0
def test_plot_no_numeric_data(self):
df = DataFrame(["a", "b", "c"])
with pytest.raises(TypeError, match="no numeric data to plot"):
df.plot()
@pytest.mark.parametrize(
"kind", ("line", "bar", "barh", "hist", "kde", "density", "area", "pie")
)
def test_group_subplot(self, kind):
pytest.importorskip("scipy")
d = {
"a": np.arange(10),
"b": np.arange(10) + 1,
"c": np.arange(10) + 1,
"d": np.arange(10),
"e": np.arange(10),
}
df = DataFrame(d)
axes = df.plot(subplots=[("b", "e"), ("c", "d")], kind=kind)
assert len(axes) == 3 # 2 groups + single column a
expected_labels = (["b", "e"], ["c", "d"], ["a"])
for ax, labels in zip(axes, expected_labels):
if kind != "pie":
_check_legend_labels(ax, labels=labels)
if kind == "line":
assert len(ax.lines) == len(labels)
def test_group_subplot_series_notimplemented(self):
ser = Series(range(1))
msg = "An iterable subplots for a Series"
with pytest.raises(NotImplementedError, match=msg):
ser.plot(subplots=[("a",)])
def test_group_subplot_multiindex_notimplemented(self):
df = DataFrame(np.eye(2), columns=MultiIndex.from_tuples([(0, 1), (1, 2)]))
msg = "An iterable subplots for a DataFrame with a MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
df.plot(subplots=[(0, 1)])
def test_group_subplot_nonunique_cols_notimplemented(self):
df = DataFrame(np.eye(2), columns=["a", "a"])
msg = "An iterable subplots for a DataFrame with non-unique"
with pytest.raises(NotImplementedError, match=msg):
df.plot(subplots=[("a",)])
@pytest.mark.parametrize(
"subplots, expected_msg",
[
(123, "subplots should be a bool or an iterable"),
("a", "each entry should be a list/tuple"), # iterable of non-iterable
((1,), "each entry should be a list/tuple"), # iterable of non-iterable
(("a",), "each entry should be a list/tuple"), # iterable of strings
],
)
def test_group_subplot_bad_input(self, subplots, expected_msg):
# Make sure error is raised when subplots is not a properly
# formatted iterable. Only iterables of iterables are permitted, and
# entries should not be strings.
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match=expected_msg):
df.plot(subplots=subplots)
def test_group_subplot_invalid_column_name(self):
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
df.plot(subplots=[("a", "bad_name")])
def test_group_subplot_duplicated_column(self):
d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match="should be in only one subplot"):
df.plot(subplots=[("a", "b"), ("a", "c")])
@pytest.mark.parametrize("kind", ("box", "scatter", "hexbin"))
def test_group_subplot_invalid_kind(self, kind):
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(
ValueError, match="When subplots is an iterable, kind must be one of"
):
df.plot(subplots=[("a", "b")], kind=kind)
@pytest.mark.parametrize(
"index_name, old_label, new_label",
[
(None, "", "new"),
("old", "old", "new"),
(None, "", ""),
(None, "", 1),
(None, "", [1, 2]),
],
)
@pytest.mark.parametrize("kind", ["line", "area", "bar"])
def test_xlabel_ylabel_dataframe_single_plot(
self, kind, index_name, old_label, new_label
):
# GH 9093
df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
df.index.name = index_name
# default is the ylabel is not shown and xlabel is index name
ax = df.plot(kind=kind)
assert ax.get_xlabel() == old_label
assert ax.get_ylabel() == ""
# old xlabel will be overridden and assigned ylabel will be used as ylabel
ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label)
assert ax.get_ylabel() == str(new_label)
assert ax.get_xlabel() == str(new_label)
@pytest.mark.parametrize(
"xlabel, ylabel",
[
(None, None),
("X Label", None),
(None, "Y Label"),
("X Label", "Y Label"),
],
)
@pytest.mark.parametrize("kind", ["scatter", "hexbin"])
def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel):
# GH 37001
xcol = "Type A"
ycol = "Type B"
df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol])
# default is the labels are column names
ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == (xcol if xlabel is None else xlabel)
assert ax.get_ylabel() == (ycol if ylabel is None else ylabel)
@pytest.mark.parametrize("secondary_y", (False, True))
def test_secondary_y(self, secondary_y):
ax_df = DataFrame([0]).plot(
secondary_y=secondary_y, ylabel="Y", ylim=(0, 100), yticks=[99]
)
for ax in ax_df.figure.axes:
if ax.yaxis.get_visible():
assert ax.get_ylabel() == "Y"
assert ax.get_ylim() == (0, 100)
assert ax.get_yticks()[0] == 99
@pytest.mark.slow
def test_plot_no_warning(self):
# GH 55138
# TODO(3.0): this can be removed once Period[B] deprecation is enforced
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
with tm.assert_produces_warning(False):
_ = df.plot()
_ = df.T.plot()
def _generate_4_axes_via_gridspec():
import matplotlib.pyplot as plt
gs = mpl.gridspec.GridSpec(2, 2)
ax_tl = plt.subplot(gs[0, 0])
ax_ll = plt.subplot(gs[1, 0])
ax_tr = plt.subplot(gs[0, 1])
ax_lr = plt.subplot(gs[1, 1])
return gs, [ax_tl, ax_ll, ax_tr, ax_lr]