You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

428 lines
13 KiB

from datetime import datetime
import operator
import numpy as np
import pytest
from pandas import (
Series,
_testing as tm,
)
def test_title(any_string_dtype):
s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
result = s.str.title()
expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
def test_title_mixed_object():
s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
result = s.str.title()
expected = Series(
["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
dtype=object,
)
tm.assert_almost_equal(result, expected)
def test_lower_upper(any_string_dtype):
s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype)
result = s.str.upper()
expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
result = result.str.lower()
tm.assert_series_equal(result, s)
def test_lower_upper_mixed_object():
s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
result = s.str.upper()
expected = Series(
["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan], dtype=object
)
tm.assert_series_equal(result, expected)
result = s.str.lower()
expected = Series(
["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected",
[
(
["FOO", "BAR", np.nan, "Blah", "blurg"],
["Foo", "Bar", np.nan, "Blah", "Blurg"],
),
(["a", "b", "c"], ["A", "B", "C"]),
(["a b", "a bc. de"], ["A b", "A bc. de"]),
],
)
def test_capitalize(data, expected, any_string_dtype):
s = Series(data, dtype=any_string_dtype)
result = s.str.capitalize()
expected = Series(expected, dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
def test_capitalize_mixed_object():
s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
result = s.str.capitalize()
expected = Series(
["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_swapcase(any_string_dtype):
s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
result = s.str.swapcase()
expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
def test_swapcase_mixed_object():
s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
result = s.str.swapcase()
expected = Series(
["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_casefold():
# GH25405
expected = Series(["ss", np.nan, "case", "ssd"])
s = Series(["ß", np.nan, "case", "ßd"])
result = s.str.casefold()
tm.assert_series_equal(result, expected)
def test_casemethods(any_string_dtype):
values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"]
s = Series(values, dtype=any_string_dtype)
assert s.str.lower().tolist() == [v.lower() for v in values]
assert s.str.upper().tolist() == [v.upper() for v in values]
assert s.str.title().tolist() == [v.title() for v in values]
assert s.str.capitalize().tolist() == [v.capitalize() for v in values]
assert s.str.swapcase().tolist() == [v.swapcase() for v in values]
def test_pad(any_string_dtype):
s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
result = s.str.pad(5, side="left")
expected = Series(
[" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="right")
expected = Series(
["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="both")
expected = Series(
[" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
def test_pad_mixed_object():
s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0])
result = s.str.pad(5, side="left")
expected = Series(
[" a", np.nan, " b", np.nan, np.nan, " ee", None, np.nan, np.nan],
dtype=object,
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="right")
expected = Series(
["a ", np.nan, "b ", np.nan, np.nan, "ee ", None, np.nan, np.nan],
dtype=object,
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="both")
expected = Series(
[" a ", np.nan, " b ", np.nan, np.nan, " ee ", None, np.nan, np.nan],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_pad_fillchar(any_string_dtype):
s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
result = s.str.pad(5, side="left", fillchar="X")
expected = Series(
["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="right", fillchar="X")
expected = Series(
["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.pad(5, side="both", fillchar="X")
expected = Series(
["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
def test_pad_fillchar_bad_arg_raises(any_string_dtype):
s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
msg = "fillchar must be a character, not str"
with pytest.raises(TypeError, match=msg):
s.str.pad(5, fillchar="XY")
msg = "fillchar must be a character, not int"
with pytest.raises(TypeError, match=msg):
s.str.pad(5, fillchar=5)
@pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"])
def test_pad_width_bad_arg_raises(method_name, any_string_dtype):
# see gh-13598
s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype)
op = operator.methodcaller(method_name, "f")
msg = "width must be of integer type, not str"
with pytest.raises(TypeError, match=msg):
op(s.str)
def test_center_ljust_rjust(any_string_dtype):
s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
result = s.str.center(5)
expected = Series(
[" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.ljust(5)
expected = Series(
["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
result = s.str.rjust(5)
expected = Series(
[" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
def test_center_ljust_rjust_mixed_object():
s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0])
result = s.str.center(5)
expected = Series(
[
" a ",
np.nan,
" b ",
np.nan,
np.nan,
" c ",
" eee ",
None,
np.nan,
np.nan,
],
dtype=object,
)
tm.assert_series_equal(result, expected)
result = s.str.ljust(5)
expected = Series(
[
"a ",
np.nan,
"b ",
np.nan,
np.nan,
"c ",
"eee ",
None,
np.nan,
np.nan,
],
dtype=object,
)
tm.assert_series_equal(result, expected)
result = s.str.rjust(5)
expected = Series(
[
" a",
np.nan,
" b",
np.nan,
np.nan,
" c",
" eee",
None,
np.nan,
np.nan,
],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_center_ljust_rjust_fillchar(any_string_dtype):
if any_string_dtype == "string[pyarrow_numpy]":
pytest.skip(
"Arrow logic is different, "
"see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
)
s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
result = s.str.center(5, fillchar="X")
expected = Series(
["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_)
tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
result = s.str.ljust(5, fillchar="X")
expected = Series(
["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_)
tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
result = s.str.rjust(5, fillchar="X")
expected = Series(
["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_)
tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype):
s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
# If fillchar is not a character, normal str raises TypeError
# 'aaa'.ljust(5, 'XY')
# TypeError: must be char, not str
template = "fillchar must be a character, not {dtype}"
with pytest.raises(TypeError, match=template.format(dtype="str")):
s.str.center(5, fillchar="XY")
with pytest.raises(TypeError, match=template.format(dtype="str")):
s.str.ljust(5, fillchar="XY")
with pytest.raises(TypeError, match=template.format(dtype="str")):
s.str.rjust(5, fillchar="XY")
with pytest.raises(TypeError, match=template.format(dtype="int")):
s.str.center(5, fillchar=1)
with pytest.raises(TypeError, match=template.format(dtype="int")):
s.str.ljust(5, fillchar=1)
with pytest.raises(TypeError, match=template.format(dtype="int")):
s.str.rjust(5, fillchar=1)
def test_zfill(any_string_dtype):
s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype)
result = s.str.zfill(5)
expected = Series(
["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_)
tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
result = s.str.zfill(3)
expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_)
tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype)
result = s.str.zfill(5)
expected = Series(
["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype
)
tm.assert_series_equal(result, expected)
def test_wrap(any_string_dtype):
# test values are: two words less than width, two words equal to width,
# two words greater than width, one word less than width, one word
# equal to width, one word greater than width, multiple tokens with
# trailing whitespace equal to width
s = Series(
[
"hello world",
"hello world!",
"hello world!!",
"abcdefabcde",
"abcdefabcdef",
"abcdefabcdefa",
"ab ab ab ab ",
"ab ab ab ab a",
"\t",
],
dtype=any_string_dtype,
)
# expected values
expected = Series(
[
"hello world",
"hello world!",
"hello\nworld!!",
"abcdefabcde",
"abcdefabcdef",
"abcdefabcdef\na",
"ab ab ab ab",
"ab ab ab ab\na",
"",
],
dtype=any_string_dtype,
)
result = s.str.wrap(12, break_long_words=True)
tm.assert_series_equal(result, expected)
def test_wrap_unicode(any_string_dtype):
# test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
s = Series(
[" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype
)
expected = Series(
[" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype
)
result = s.str.wrap(6)
tm.assert_series_equal(result, expected)