You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
78 lines
2.0 KiB
78 lines
2.0 KiB
import numpy as np
|
|
import pytest
|
|
|
|
from pandas import (
|
|
NA,
|
|
Categorical,
|
|
Series,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"keep, expected",
|
|
[
|
|
("first", Series([False, False, True, False, True], name="name")),
|
|
("last", Series([True, True, False, False, False], name="name")),
|
|
(False, Series([True, True, True, False, True], name="name")),
|
|
],
|
|
)
|
|
def test_duplicated_keep(keep, expected):
|
|
ser = Series(["a", "b", "b", "c", "a"], name="name")
|
|
|
|
result = ser.duplicated(keep=keep)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"keep, expected",
|
|
[
|
|
("first", Series([False, False, True, False, True])),
|
|
("last", Series([True, True, False, False, False])),
|
|
(False, Series([True, True, True, False, True])),
|
|
],
|
|
)
|
|
def test_duplicated_nan_none(keep, expected):
|
|
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
|
|
|
|
result = ser.duplicated(keep=keep)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_duplicated_categorical_bool_na(nulls_fixture):
|
|
# GH#44351
|
|
ser = Series(
|
|
Categorical(
|
|
[True, False, True, False, nulls_fixture],
|
|
categories=[True, False],
|
|
ordered=True,
|
|
)
|
|
)
|
|
result = ser.duplicated()
|
|
expected = Series([False, False, True, True, False])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"keep, vals",
|
|
[
|
|
("last", [True, True, False]),
|
|
("first", [False, True, True]),
|
|
(False, [True, True, True]),
|
|
],
|
|
)
|
|
def test_duplicated_mask(keep, vals):
|
|
# GH#48150
|
|
ser = Series([1, 2, NA, NA, NA], dtype="Int64")
|
|
result = ser.duplicated(keep=keep)
|
|
expected = Series([False, False] + vals)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_duplicated_mask_no_duplicated_na(keep):
|
|
# GH#48150
|
|
ser = Series([1, 2, NA], dtype="Int64")
|
|
result = ser.duplicated(keep=keep)
|
|
expected = Series([False, False, False])
|
|
tm.assert_series_equal(result, expected)
|