You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
217 lines
5.9 KiB
217 lines
5.9 KiB
from datetime import datetime
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
DataFrame,
|
|
Series,
|
|
Timestamp,
|
|
date_range,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
from pandas.tseries.offsets import BDay
|
|
|
|
|
|
def test_map(float_frame):
|
|
result = float_frame.map(lambda x: x * 2)
|
|
tm.assert_frame_equal(result, float_frame * 2)
|
|
float_frame.map(type)
|
|
|
|
# GH 465: function returning tuples
|
|
result = float_frame.map(lambda x: (x, x))["A"].iloc[0]
|
|
assert isinstance(result, tuple)
|
|
|
|
|
|
@pytest.mark.parametrize("val", [1, 1.0])
|
|
def test_map_float_object_conversion(val):
|
|
# GH 2909: object conversion to float in constructor?
|
|
df = DataFrame(data=[val, "a"])
|
|
result = df.map(lambda x: x).dtypes[0]
|
|
assert result == object
|
|
|
|
|
|
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
|
def test_map_keeps_dtype(na_action):
|
|
# GH52219
|
|
arr = Series(["a", np.nan, "b"])
|
|
sparse_arr = arr.astype(pd.SparseDtype(object))
|
|
df = DataFrame(data={"a": arr, "b": sparse_arr})
|
|
|
|
def func(x):
|
|
return str.upper(x) if not pd.isna(x) else x
|
|
|
|
result = df.map(func, na_action=na_action)
|
|
|
|
expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object))
|
|
expected_arr = expected_sparse.astype(object)
|
|
expected = DataFrame({"a": expected_arr, "b": expected_sparse})
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result_empty = df.iloc[:0, :].map(func, na_action=na_action)
|
|
expected_empty = expected.iloc[:0, :]
|
|
tm.assert_frame_equal(result_empty, expected_empty)
|
|
|
|
|
|
def test_map_str():
|
|
# GH 2786
|
|
df = DataFrame(np.random.default_rng(2).random((3, 4)))
|
|
df2 = df.copy()
|
|
cols = ["a", "a", "a", "a"]
|
|
df.columns = cols
|
|
|
|
expected = df2.map(str)
|
|
expected.columns = cols
|
|
result = df.map(str)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"col, val",
|
|
[["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]],
|
|
)
|
|
def test_map_datetimelike(col, val):
|
|
# datetime/timedelta
|
|
df = DataFrame(np.random.default_rng(2).random((3, 4)))
|
|
df[col] = val
|
|
result = df.map(str)
|
|
assert result.loc[0, col] == str(df.loc[0, col])
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"expected",
|
|
[
|
|
DataFrame(),
|
|
DataFrame(columns=list("ABC")),
|
|
DataFrame(index=list("ABC")),
|
|
DataFrame({"A": [], "B": [], "C": []}),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("func", [round, lambda x: x])
|
|
def test_map_empty(expected, func):
|
|
# GH 8222
|
|
result = expected.map(func)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_map_kwargs():
|
|
# GH 40652
|
|
result = DataFrame([[1, 2], [3, 4]]).map(lambda x, y: x + y, y=2)
|
|
expected = DataFrame([[3, 4], [5, 6]])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_map_na_ignore(float_frame):
|
|
# GH 23803
|
|
strlen_frame = float_frame.map(lambda x: len(str(x)))
|
|
float_frame_with_na = float_frame.copy()
|
|
mask = np.random.default_rng(2).integers(0, 2, size=float_frame.shape, dtype=bool)
|
|
float_frame_with_na[mask] = pd.NA
|
|
strlen_frame_na_ignore = float_frame_with_na.map(
|
|
lambda x: len(str(x)), na_action="ignore"
|
|
)
|
|
# Set float64 type to avoid upcast when setting NA below
|
|
strlen_frame_with_na = strlen_frame.copy().astype("float64")
|
|
strlen_frame_with_na[mask] = pd.NA
|
|
tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
|
|
|
|
|
|
def test_map_box_timestamps():
|
|
# GH 2689, GH 2627
|
|
ser = Series(date_range("1/1/2000", periods=10))
|
|
|
|
def func(x):
|
|
return (x.hour, x.day, x.month)
|
|
|
|
# it works!
|
|
DataFrame(ser).map(func)
|
|
|
|
|
|
def test_map_box():
|
|
# ufunc will not be boxed. Same test cases as the test_map_box
|
|
df = DataFrame(
|
|
{
|
|
"a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")],
|
|
"b": [
|
|
Timestamp("2011-01-01", tz="US/Eastern"),
|
|
Timestamp("2011-01-02", tz="US/Eastern"),
|
|
],
|
|
"c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")],
|
|
"d": [
|
|
pd.Period("2011-01-01", freq="M"),
|
|
pd.Period("2011-01-02", freq="M"),
|
|
],
|
|
}
|
|
)
|
|
|
|
result = df.map(lambda x: type(x).__name__)
|
|
expected = DataFrame(
|
|
{
|
|
"a": ["Timestamp", "Timestamp"],
|
|
"b": ["Timestamp", "Timestamp"],
|
|
"c": ["Timedelta", "Timedelta"],
|
|
"d": ["Period", "Period"],
|
|
}
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_frame_map_dont_convert_datetime64():
|
|
df = DataFrame({"x1": [datetime(1996, 1, 1)]})
|
|
|
|
df = df.map(lambda x: x + BDay())
|
|
df = df.map(lambda x: x + BDay())
|
|
|
|
result = df.x1.dtype
|
|
assert result == "M8[ns]"
|
|
|
|
|
|
def test_map_function_runs_once():
|
|
df = DataFrame({"a": [1, 2, 3]})
|
|
values = [] # Save values function is applied to
|
|
|
|
def reducing_function(val):
|
|
values.append(val)
|
|
|
|
def non_reducing_function(val):
|
|
values.append(val)
|
|
return val
|
|
|
|
for func in [reducing_function, non_reducing_function]:
|
|
del values[:]
|
|
|
|
df.map(func)
|
|
assert values == df.a.to_list()
|
|
|
|
|
|
def test_map_type():
|
|
# GH 46719
|
|
df = DataFrame(
|
|
{"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
|
|
index=["a", "b", "c"],
|
|
)
|
|
|
|
result = df.map(type)
|
|
expected = DataFrame(
|
|
{"col1": [int, str, type], "col2": [float, datetime, float]},
|
|
index=["a", "b", "c"],
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_map_invalid_na_action(float_frame):
|
|
# GH 23803
|
|
with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
|
|
float_frame.map(lambda x: len(str(x)), na_action="abc")
|
|
|
|
|
|
def test_applymap_deprecated():
|
|
# GH52353
|
|
df = DataFrame({"a": [1, 2, 3]})
|
|
msg = "DataFrame.applymap has been deprecated. Use DataFrame.map instead."
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
df.applymap(lambda x: x)
|