You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
2.9 KiB

7 months ago
import numpy as np
from pandas import (
DataFrame,
date_range,
)
import pandas._testing as tm
class TestEquals:
def test_dataframe_not_equal(self):
# see GH#28839
df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]})
df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
assert df1.equals(df2) is False
def test_equals_different_blocks(self, using_array_manager, using_infer_string):
# GH#9330
df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
df1 = df0.reset_index()[["A", "B", "C"]]
if not using_array_manager and not using_infer_string:
# this assert verifies that the above operations have
# induced a block rearrangement
assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
# do the real tests
tm.assert_frame_equal(df0, df1)
assert df0.equals(df1)
assert df1.equals(df0)
def test_equals(self):
# Add object dtype column with nans
index = np.random.default_rng(2).random(10)
df1 = DataFrame(
np.random.default_rng(2).random(10), index=index, columns=["floats"]
)
df1["text"] = "the sky is so blue. we could use more chocolate.".split()
df1["start"] = date_range("2000-1-1", periods=10, freq="min")
df1["end"] = date_range("2000-1-1", periods=10, freq="D")
df1["diff"] = df1["end"] - df1["start"]
# Explicitly cast to object, to avoid implicit cast when setting np.nan
df1["bool"] = (np.arange(10) % 3 == 0).astype(object)
df1.loc[::2] = np.nan
df2 = df1.copy()
assert df1["text"].equals(df2["text"])
assert df1["start"].equals(df2["start"])
assert df1["end"].equals(df2["end"])
assert df1["diff"].equals(df2["diff"])
assert df1["bool"].equals(df2["bool"])
assert df1.equals(df2)
assert not df1.equals(object)
# different dtype
different = df1.copy()
different["floats"] = different["floats"].astype("float32")
assert not df1.equals(different)
# different index
different_index = -index
different = df2.set_index(different_index)
assert not df1.equals(different)
# different columns
different = df2.copy()
different.columns = df2.columns[::-1]
assert not df1.equals(different)
# DatetimeIndex
index = date_range("2000-1-1", periods=10, freq="min")
df1 = df1.set_index(index)
df2 = df1.copy()
assert df1.equals(df2)
# MultiIndex
df3 = df1.set_index(["text"], append=True)
df2 = df1.set_index(["text"], append=True)
assert df3.equals(df2)
df2 = df1.set_index(["floats"], append=True)
assert not df3.equals(df2)
# NaN in index
df3 = df1.set_index(["floats"], append=True)
df2 = df1.set_index(["floats"], append=True)
assert df3.equals(df2)