You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
424 lines
13 KiB
424 lines
13 KiB
6 months ago
|
from textwrap import dedent
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas.errors import (
|
||
|
PyperclipException,
|
||
|
PyperclipWindowsException,
|
||
|
)
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
NA,
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
get_option,
|
||
|
read_clipboard,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.arrays import (
|
||
|
ArrowStringArray,
|
||
|
StringArray,
|
||
|
)
|
||
|
|
||
|
from pandas.io.clipboard import (
|
||
|
CheckedCall,
|
||
|
_stringifyText,
|
||
|
init_qt_clipboard,
|
||
|
)
|
||
|
|
||
|
|
||
|
def build_kwargs(sep, excel):
|
||
|
kwargs = {}
|
||
|
if excel != "default":
|
||
|
kwargs["excel"] = excel
|
||
|
if sep != "default":
|
||
|
kwargs["sep"] = sep
|
||
|
return kwargs
|
||
|
|
||
|
|
||
|
@pytest.fixture(
|
||
|
params=[
|
||
|
"delims",
|
||
|
"utf8",
|
||
|
"utf16",
|
||
|
"string",
|
||
|
"long",
|
||
|
"nonascii",
|
||
|
"colwidth",
|
||
|
"mixed",
|
||
|
"float",
|
||
|
"int",
|
||
|
]
|
||
|
)
|
||
|
def df(request):
|
||
|
data_type = request.param
|
||
|
|
||
|
if data_type == "delims":
|
||
|
return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]})
|
||
|
elif data_type == "utf8":
|
||
|
return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]})
|
||
|
elif data_type == "utf16":
|
||
|
return DataFrame(
|
||
|
{"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
|
||
|
)
|
||
|
elif data_type == "string":
|
||
|
return DataFrame(
|
||
|
np.array([f"i-{i}" for i in range(15)]).reshape(5, 3), columns=list("abc")
|
||
|
)
|
||
|
elif data_type == "long":
|
||
|
max_rows = get_option("display.max_rows")
|
||
|
return DataFrame(
|
||
|
np.random.default_rng(2).integers(0, 10, size=(max_rows + 1, 3)),
|
||
|
columns=list("abc"),
|
||
|
)
|
||
|
elif data_type == "nonascii":
|
||
|
return DataFrame({"en": "in English".split(), "es": "en español".split()})
|
||
|
elif data_type == "colwidth":
|
||
|
_cw = get_option("display.max_colwidth") + 1
|
||
|
return DataFrame(
|
||
|
np.array(["x" * _cw for _ in range(15)]).reshape(5, 3), columns=list("abc")
|
||
|
)
|
||
|
elif data_type == "mixed":
|
||
|
return DataFrame(
|
||
|
{
|
||
|
"a": np.arange(1.0, 6.0) + 0.01,
|
||
|
"b": np.arange(1, 6).astype(np.int64),
|
||
|
"c": list("abcde"),
|
||
|
}
|
||
|
)
|
||
|
elif data_type == "float":
|
||
|
return DataFrame(np.random.default_rng(2).random((5, 3)), columns=list("abc"))
|
||
|
elif data_type == "int":
|
||
|
return DataFrame(
|
||
|
np.random.default_rng(2).integers(0, 10, (5, 3)), columns=list("abc")
|
||
|
)
|
||
|
else:
|
||
|
raise ValueError
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def mock_ctypes(monkeypatch):
|
||
|
"""
|
||
|
Mocks WinError to help with testing the clipboard.
|
||
|
"""
|
||
|
|
||
|
def _mock_win_error():
|
||
|
return "Window Error"
|
||
|
|
||
|
# Set raising to False because WinError won't exist on non-windows platforms
|
||
|
with monkeypatch.context() as m:
|
||
|
m.setattr("ctypes.WinError", _mock_win_error, raising=False)
|
||
|
yield
|
||
|
|
||
|
|
||
|
@pytest.mark.usefixtures("mock_ctypes")
|
||
|
def test_checked_call_with_bad_call(monkeypatch):
|
||
|
"""
|
||
|
Give CheckCall a function that returns a falsey value and
|
||
|
mock get_errno so it returns false so an exception is raised.
|
||
|
"""
|
||
|
|
||
|
def _return_false():
|
||
|
return False
|
||
|
|
||
|
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True)
|
||
|
msg = f"Error calling {_return_false.__name__} \\(Window Error\\)"
|
||
|
|
||
|
with pytest.raises(PyperclipWindowsException, match=msg):
|
||
|
CheckedCall(_return_false)()
|
||
|
|
||
|
|
||
|
@pytest.mark.usefixtures("mock_ctypes")
|
||
|
def test_checked_call_with_valid_call(monkeypatch):
|
||
|
"""
|
||
|
Give CheckCall a function that returns a truthy value and
|
||
|
mock get_errno so it returns true so an exception is not raised.
|
||
|
The function should return the results from _return_true.
|
||
|
"""
|
||
|
|
||
|
def _return_true():
|
||
|
return True
|
||
|
|
||
|
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False)
|
||
|
|
||
|
# Give CheckedCall a callable that returns a truthy value s
|
||
|
checked_call = CheckedCall(_return_true)
|
||
|
assert checked_call() is True
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"text",
|
||
|
[
|
||
|
"String_test",
|
||
|
True,
|
||
|
1,
|
||
|
1.0,
|
||
|
1j,
|
||
|
],
|
||
|
)
|
||
|
def test_stringify_text(text):
|
||
|
valid_types = (str, int, float, bool)
|
||
|
|
||
|
if isinstance(text, valid_types):
|
||
|
result = _stringifyText(text)
|
||
|
assert result == str(text)
|
||
|
else:
|
||
|
msg = (
|
||
|
"only str, int, float, and bool values "
|
||
|
f"can be copied to the clipboard, not {type(text).__name__}"
|
||
|
)
|
||
|
with pytest.raises(PyperclipException, match=msg):
|
||
|
_stringifyText(text)
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def set_pyqt_clipboard(monkeypatch):
|
||
|
qt_cut, qt_paste = init_qt_clipboard()
|
||
|
with monkeypatch.context() as m:
|
||
|
m.setattr(pd.io.clipboard, "clipboard_set", qt_cut)
|
||
|
m.setattr(pd.io.clipboard, "clipboard_get", qt_paste)
|
||
|
yield
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def clipboard(qapp):
|
||
|
clip = qapp.clipboard()
|
||
|
yield clip
|
||
|
clip.clear()
|
||
|
|
||
|
|
||
|
@pytest.mark.single_cpu
|
||
|
@pytest.mark.clipboard
|
||
|
@pytest.mark.usefixtures("set_pyqt_clipboard")
|
||
|
@pytest.mark.usefixtures("clipboard")
|
||
|
class TestClipboard:
|
||
|
# Test that default arguments copy as tab delimited
|
||
|
# Test that explicit delimiters are respected
|
||
|
@pytest.mark.parametrize("sep", [None, "\t", ",", "|"])
|
||
|
@pytest.mark.parametrize("encoding", [None, "UTF-8", "utf-8", "utf8"])
|
||
|
def test_round_trip_frame_sep(self, df, sep, encoding):
|
||
|
df.to_clipboard(excel=None, sep=sep, encoding=encoding)
|
||
|
result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
# Test white space separator
|
||
|
def test_round_trip_frame_string(self, df):
|
||
|
df.to_clipboard(excel=False, sep=None)
|
||
|
result = read_clipboard()
|
||
|
assert df.to_string() == result.to_string()
|
||
|
assert df.shape == result.shape
|
||
|
|
||
|
# Two character separator is not supported in to_clipboard
|
||
|
# Test that multi-character separators are not silently passed
|
||
|
def test_excel_sep_warning(self, df):
|
||
|
with tm.assert_produces_warning(
|
||
|
UserWarning,
|
||
|
match="to_clipboard in excel mode requires a single character separator.",
|
||
|
check_stacklevel=False,
|
||
|
):
|
||
|
df.to_clipboard(excel=True, sep=r"\t")
|
||
|
|
||
|
# Separator is ignored when excel=False and should produce a warning
|
||
|
def test_copy_delim_warning(self, df):
|
||
|
with tm.assert_produces_warning():
|
||
|
df.to_clipboard(excel=False, sep="\t")
|
||
|
|
||
|
# Tests that the default behavior of to_clipboard is tab
|
||
|
# delimited and excel="True"
|
||
|
@pytest.mark.parametrize("sep", ["\t", None, "default"])
|
||
|
@pytest.mark.parametrize("excel", [True, None, "default"])
|
||
|
def test_clipboard_copy_tabs_default(self, sep, excel, df, clipboard):
|
||
|
kwargs = build_kwargs(sep, excel)
|
||
|
df.to_clipboard(**kwargs)
|
||
|
assert clipboard.text() == df.to_csv(sep="\t")
|
||
|
|
||
|
# Tests reading of white space separated tables
|
||
|
@pytest.mark.parametrize("sep", [None, "default"])
|
||
|
def test_clipboard_copy_strings(self, sep, df):
|
||
|
kwargs = build_kwargs(sep, False)
|
||
|
df.to_clipboard(**kwargs)
|
||
|
result = read_clipboard(sep=r"\s+")
|
||
|
assert result.to_string() == df.to_string()
|
||
|
assert df.shape == result.shape
|
||
|
|
||
|
def test_read_clipboard_infer_excel(self, clipboard):
|
||
|
# gh-19010: avoid warnings
|
||
|
clip_kwargs = {"engine": "python"}
|
||
|
|
||
|
text = dedent(
|
||
|
"""
|
||
|
John James\tCharlie Mingus
|
||
|
1\t2
|
||
|
4\tHarry Carney
|
||
|
""".strip()
|
||
|
)
|
||
|
clipboard.setText(text)
|
||
|
df = read_clipboard(**clip_kwargs)
|
||
|
|
||
|
# excel data is parsed correctly
|
||
|
assert df.iloc[1, 1] == "Harry Carney"
|
||
|
|
||
|
# having diff tab counts doesn't trigger it
|
||
|
text = dedent(
|
||
|
"""
|
||
|
a\t b
|
||
|
1 2
|
||
|
3 4
|
||
|
""".strip()
|
||
|
)
|
||
|
clipboard.setText(text)
|
||
|
res = read_clipboard(**clip_kwargs)
|
||
|
|
||
|
text = dedent(
|
||
|
"""
|
||
|
a b
|
||
|
1 2
|
||
|
3 4
|
||
|
""".strip()
|
||
|
)
|
||
|
clipboard.setText(text)
|
||
|
exp = read_clipboard(**clip_kwargs)
|
||
|
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
def test_infer_excel_with_nulls(self, clipboard):
|
||
|
# GH41108
|
||
|
text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
|
||
|
|
||
|
clipboard.setText(text)
|
||
|
df = read_clipboard()
|
||
|
df_expected = DataFrame(
|
||
|
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
|
||
|
)
|
||
|
|
||
|
# excel data is parsed correctly
|
||
|
tm.assert_frame_equal(df, df_expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"multiindex",
|
||
|
[
|
||
|
( # Can't use `dedent` here as it will remove the leading `\t`
|
||
|
"\n".join(
|
||
|
[
|
||
|
"\t\t\tcol1\tcol2",
|
||
|
"A\t0\tTrue\t1\tred",
|
||
|
"A\t1\tTrue\t\tblue",
|
||
|
"B\t0\tFalse\t2\tgreen",
|
||
|
]
|
||
|
),
|
||
|
[["A", "A", "B"], [0, 1, 0], [True, True, False]],
|
||
|
),
|
||
|
(
|
||
|
"\n".join(
|
||
|
["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
|
||
|
),
|
||
|
[["A", "A", "B"], [0, 1, 0]],
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_infer_excel_with_multiindex(self, clipboard, multiindex):
|
||
|
# GH41108
|
||
|
|
||
|
clipboard.setText(multiindex[0])
|
||
|
df = read_clipboard()
|
||
|
df_expected = DataFrame(
|
||
|
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
|
||
|
index=multiindex[1],
|
||
|
)
|
||
|
|
||
|
# excel data is parsed correctly
|
||
|
tm.assert_frame_equal(df, df_expected)
|
||
|
|
||
|
def test_invalid_encoding(self, df):
|
||
|
msg = "clipboard only supports utf-8 encoding"
|
||
|
# test case for testing invalid encoding
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df.to_clipboard(encoding="ascii")
|
||
|
with pytest.raises(NotImplementedError, match=msg):
|
||
|
read_clipboard(encoding="ascii")
|
||
|
|
||
|
@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."])
|
||
|
def test_raw_roundtrip(self, data):
|
||
|
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
|
||
|
df = DataFrame({"data": [data]})
|
||
|
df.to_clipboard()
|
||
|
result = read_clipboard()
|
||
|
tm.assert_frame_equal(df, result)
|
||
|
|
||
|
@pytest.mark.parametrize("engine", ["c", "python"])
|
||
|
def test_read_clipboard_dtype_backend(
|
||
|
self, clipboard, string_storage, dtype_backend, engine
|
||
|
):
|
||
|
# GH#50502
|
||
|
if string_storage == "pyarrow" or dtype_backend == "pyarrow":
|
||
|
pa = pytest.importorskip("pyarrow")
|
||
|
|
||
|
if string_storage == "python":
|
||
|
string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
|
||
|
string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
|
||
|
|
||
|
elif dtype_backend == "pyarrow" and engine != "c":
|
||
|
pa = pytest.importorskip("pyarrow")
|
||
|
from pandas.arrays import ArrowExtensionArray
|
||
|
|
||
|
string_array = ArrowExtensionArray(pa.array(["x", "y"]))
|
||
|
string_array_na = ArrowExtensionArray(pa.array(["x", None]))
|
||
|
|
||
|
else:
|
||
|
string_array = ArrowStringArray(pa.array(["x", "y"]))
|
||
|
string_array_na = ArrowStringArray(pa.array(["x", None]))
|
||
|
|
||
|
text = """a,b,c,d,e,f,g,h,i
|
||
|
x,1,4.0,x,2,4.0,,True,False
|
||
|
y,2,5.0,,,,,False,"""
|
||
|
clipboard.setText(text)
|
||
|
|
||
|
with pd.option_context("mode.string_storage", string_storage):
|
||
|
result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine)
|
||
|
|
||
|
expected = DataFrame(
|
||
|
{
|
||
|
"a": string_array,
|
||
|
"b": Series([1, 2], dtype="Int64"),
|
||
|
"c": Series([4.0, 5.0], dtype="Float64"),
|
||
|
"d": string_array_na,
|
||
|
"e": Series([2, NA], dtype="Int64"),
|
||
|
"f": Series([4.0, NA], dtype="Float64"),
|
||
|
"g": Series([NA, NA], dtype="Int64"),
|
||
|
"h": Series([True, False], dtype="boolean"),
|
||
|
"i": Series([False, NA], dtype="boolean"),
|
||
|
}
|
||
|
)
|
||
|
if dtype_backend == "pyarrow":
|
||
|
from pandas.arrays import ArrowExtensionArray
|
||
|
|
||
|
expected = DataFrame(
|
||
|
{
|
||
|
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
|
||
|
for col in expected.columns
|
||
|
}
|
||
|
)
|
||
|
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_invalid_dtype_backend(self):
|
||
|
msg = (
|
||
|
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
|
||
|
"'pyarrow' are allowed."
|
||
|
)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
read_clipboard(dtype_backend="numpy")
|
||
|
|
||
|
def test_to_clipboard_pos_args_deprecation(self):
|
||
|
# GH-54229
|
||
|
df = DataFrame({"a": [1, 2, 3]})
|
||
|
msg = (
|
||
|
r"Starting with pandas version 3.0 all arguments of to_clipboard "
|
||
|
r"will be keyword-only."
|
||
|
)
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
df.to_clipboard(True, None)
|