You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
381 lines
15 KiB
381 lines
15 KiB
import numpy as np
|
|
import pytest
|
|
|
|
import pandas._testing as tm
|
|
from pandas.core.indexes.api import Index
|
|
|
|
|
|
class TestJoinInt64Index:
|
|
def test_join_non_unique(self):
|
|
left = Index([4, 4, 3, 3])
|
|
|
|
joined, lidx, ridx = left.join(left, return_indexers=True)
|
|
|
|
exp_joined = Index([4, 4, 4, 4, 3, 3, 3, 3])
|
|
tm.assert_index_equal(joined, exp_joined)
|
|
|
|
exp_lidx = np.array([0, 0, 1, 1, 2, 2, 3, 3], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
|
|
|
exp_ridx = np.array([0, 1, 0, 1, 2, 3, 2, 3], dtype=np.intp)
|
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
|
|
|
def test_join_inner(self):
|
|
index = Index(range(0, 20, 2), dtype=np.int64)
|
|
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
|
|
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index.join(other, how="inner", return_indexers=True)
|
|
|
|
# no guarantee of sortedness, so sort for comparison purposes
|
|
ind = res.argsort()
|
|
res = res.take(ind)
|
|
lidx = lidx.take(ind)
|
|
ridx = ridx.take(ind)
|
|
|
|
eres = Index([2, 12], dtype=np.int64)
|
|
elidx = np.array([1, 6], dtype=np.intp)
|
|
eridx = np.array([4, 1], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True)
|
|
|
|
res2 = index.intersection(other_mono)
|
|
tm.assert_index_equal(res, res2)
|
|
|
|
elidx = np.array([1, 6], dtype=np.intp)
|
|
eridx = np.array([1, 4], dtype=np.intp)
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_left(self):
|
|
index = Index(range(0, 20, 2), dtype=np.int64)
|
|
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
|
|
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index.join(other, how="left", return_indexers=True)
|
|
eres = index
|
|
eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
assert lidx is None
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True)
|
|
eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp)
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
assert lidx is None
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# non-unique
|
|
idx = Index([1, 1, 2, 5])
|
|
idx2 = Index([1, 2, 5, 7, 9])
|
|
res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True)
|
|
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
|
|
eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
|
|
elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_right(self):
|
|
index = Index(range(0, 20, 2), dtype=np.int64)
|
|
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
|
|
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index.join(other, how="right", return_indexers=True)
|
|
eres = other
|
|
elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp)
|
|
|
|
assert isinstance(other, Index) and other.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
assert ridx is None
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True)
|
|
eres = other_mono
|
|
elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp)
|
|
assert isinstance(other, Index) and other.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
assert ridx is None
|
|
|
|
# non-unique
|
|
idx = Index([1, 1, 2, 5])
|
|
idx2 = Index([1, 2, 5, 7, 9])
|
|
res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True)
|
|
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
|
|
elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
|
|
eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_non_int_index(self):
|
|
index = Index(range(0, 20, 2), dtype=np.int64)
|
|
other = Index([3, 6, 7, 8, 10], dtype=object)
|
|
|
|
outer = index.join(other, how="outer")
|
|
outer2 = other.join(index, how="outer")
|
|
expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18])
|
|
tm.assert_index_equal(outer, outer2)
|
|
tm.assert_index_equal(outer, expected)
|
|
|
|
inner = index.join(other, how="inner")
|
|
inner2 = other.join(index, how="inner")
|
|
expected = Index([6, 8, 10])
|
|
tm.assert_index_equal(inner, inner2)
|
|
tm.assert_index_equal(inner, expected)
|
|
|
|
left = index.join(other, how="left")
|
|
tm.assert_index_equal(left, index.astype(object))
|
|
|
|
left2 = other.join(index, how="left")
|
|
tm.assert_index_equal(left2, other)
|
|
|
|
right = index.join(other, how="right")
|
|
tm.assert_index_equal(right, other)
|
|
|
|
right2 = other.join(index, how="right")
|
|
tm.assert_index_equal(right2, index.astype(object))
|
|
|
|
def test_join_outer(self):
|
|
index = Index(range(0, 20, 2), dtype=np.int64)
|
|
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
|
|
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
|
|
|
|
# not monotonic
|
|
# guarantee of sortedness
|
|
res, lidx, ridx = index.join(other, how="outer", return_indexers=True)
|
|
noidx_res = index.join(other, how="outer")
|
|
tm.assert_index_equal(res, noidx_res)
|
|
|
|
eres = Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25], dtype=np.int64)
|
|
elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp)
|
|
eridx = np.array(
|
|
[-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp
|
|
)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True)
|
|
noidx_res = index.join(other_mono, how="outer")
|
|
tm.assert_index_equal(res, noidx_res)
|
|
|
|
elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp)
|
|
eridx = np.array(
|
|
[-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp
|
|
)
|
|
assert isinstance(res, Index) and res.dtype == np.int64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
|
|
class TestJoinUInt64Index:
|
|
@pytest.fixture
|
|
def index_large(self):
|
|
# large values used in TestUInt64Index where no compat needed with int64/float64
|
|
large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25]
|
|
return Index(large, dtype=np.uint64)
|
|
|
|
def test_join_inner(self, index_large):
|
|
other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64"))
|
|
other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64"))
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True)
|
|
|
|
# no guarantee of sortedness, so sort for comparison purposes
|
|
ind = res.argsort()
|
|
res = res.take(ind)
|
|
lidx = lidx.take(ind)
|
|
ridx = ridx.take(ind)
|
|
|
|
eres = Index(2**63 + np.array([10, 25], dtype="uint64"))
|
|
elidx = np.array([1, 4], dtype=np.intp)
|
|
eridx = np.array([5, 2], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index_large.join(
|
|
other_mono, how="inner", return_indexers=True
|
|
)
|
|
|
|
res2 = index_large.intersection(other_mono)
|
|
tm.assert_index_equal(res, res2)
|
|
|
|
elidx = np.array([1, 4], dtype=np.intp)
|
|
eridx = np.array([3, 5], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_left(self, index_large):
|
|
other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64"))
|
|
other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64"))
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index_large.join(other, how="left", return_indexers=True)
|
|
eres = index_large
|
|
eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
assert lidx is None
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True)
|
|
eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
assert lidx is None
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# non-unique
|
|
idx = Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64"))
|
|
idx2 = Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64"))
|
|
res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True)
|
|
|
|
# 1 is in idx2, so it should be x2
|
|
eres = Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64"))
|
|
eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
|
|
elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
|
|
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_right(self, index_large):
|
|
other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64"))
|
|
other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64"))
|
|
|
|
# not monotonic
|
|
res, lidx, ridx = index_large.join(other, how="right", return_indexers=True)
|
|
eres = other
|
|
elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp)
|
|
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
assert isinstance(other, Index) and other.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
assert ridx is None
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index_large.join(
|
|
other_mono, how="right", return_indexers=True
|
|
)
|
|
eres = other_mono
|
|
elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp)
|
|
|
|
assert isinstance(other, Index) and other.dtype == np.uint64
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_index_equal(res, eres)
|
|
assert ridx is None
|
|
|
|
# non-unique
|
|
idx = Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64"))
|
|
idx2 = Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64"))
|
|
res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True)
|
|
|
|
# 1 is in idx2, so it should be x2
|
|
eres = Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64"))
|
|
elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
|
|
eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
|
|
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
def test_join_non_int_index(self, index_large):
|
|
other = Index(
|
|
2**63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object
|
|
)
|
|
|
|
outer = index_large.join(other, how="outer")
|
|
outer2 = other.join(index_large, how="outer")
|
|
expected = Index(
|
|
2**63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64")
|
|
)
|
|
tm.assert_index_equal(outer, outer2)
|
|
tm.assert_index_equal(outer, expected)
|
|
|
|
inner = index_large.join(other, how="inner")
|
|
inner2 = other.join(index_large, how="inner")
|
|
expected = Index(2**63 + np.array([10, 20], dtype="uint64"))
|
|
tm.assert_index_equal(inner, inner2)
|
|
tm.assert_index_equal(inner, expected)
|
|
|
|
left = index_large.join(other, how="left")
|
|
tm.assert_index_equal(left, index_large.astype(object))
|
|
|
|
left2 = other.join(index_large, how="left")
|
|
tm.assert_index_equal(left2, other)
|
|
|
|
right = index_large.join(other, how="right")
|
|
tm.assert_index_equal(right, other)
|
|
|
|
right2 = other.join(index_large, how="right")
|
|
tm.assert_index_equal(right2, index_large.astype(object))
|
|
|
|
def test_join_outer(self, index_large):
|
|
other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64"))
|
|
other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64"))
|
|
|
|
# not monotonic
|
|
# guarantee of sortedness
|
|
res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True)
|
|
noidx_res = index_large.join(other, how="outer")
|
|
tm.assert_index_equal(res, noidx_res)
|
|
|
|
eres = Index(
|
|
2**63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64")
|
|
)
|
|
elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp)
|
|
eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|
|
|
|
# monotonic
|
|
res, lidx, ridx = index_large.join(
|
|
other_mono, how="outer", return_indexers=True
|
|
)
|
|
noidx_res = index_large.join(other_mono, how="outer")
|
|
tm.assert_index_equal(res, noidx_res)
|
|
|
|
elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp)
|
|
eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp)
|
|
|
|
assert isinstance(res, Index) and res.dtype == np.uint64
|
|
tm.assert_index_equal(res, eres)
|
|
tm.assert_numpy_array_equal(lidx, elidx)
|
|
tm.assert_numpy_array_equal(ridx, eridx)
|