keshe/.venv/Lib/site-packages/pandas/tests/extension/base/dim2.py

"""
Tests for 2D compatibility.
"""
import numpy as np
import pytest

from pandas._libs.missing import is_matching_na

from pandas.core.dtypes.common import (
    is_bool_dtype,
    is_integer_dtype,
)

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE


class Dim2CompatTests:
    # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
    #  i.e. not for pyarrow-backed EAs.

    @pytest.fixture(autouse=True)
    def skip_if_doesnt_support_2d(self, dtype, request):
        if not dtype._supports_2d:
            node = request.node
            # In cases where we are mixed in to ExtensionTests, we only want to
            #  skip tests that are defined in Dim2CompatTests
            test_func = node._obj
            if test_func.__qualname__.startswith("Dim2CompatTests"):
                # TODO: is there a less hacky way of checking this?
                pytest.skip(f"{dtype} does not support 2D.")

    def test_transpose(self, data):
        arr2d = data.repeat(2).reshape(-1, 2)
        shape = arr2d.shape
        assert shape[0] != shape[-1]  # otherwise the rest of the test is useless

        assert arr2d.T.shape == shape[::-1]

    def test_frame_from_2d_array(self, data):
        arr2d = data.repeat(2).reshape(-1, 2)

        df = pd.DataFrame(arr2d)
        expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
        tm.assert_frame_equal(df, expected)

    def test_swapaxes(self, data):
        arr2d = data.repeat(2).reshape(-1, 2)

        result = arr2d.swapaxes(0, 1)
        expected = arr2d.T
        tm.assert_extension_array_equal(result, expected)

    def test_delete_2d(self, data):
        arr2d = data.repeat(3).reshape(-1, 3)

        # axis = 0
        result = arr2d.delete(1, axis=0)
        expected = data.delete(1).repeat(3).reshape(-1, 3)
        tm.assert_extension_array_equal(result, expected)

        # axis = 1
        result = arr2d.delete(1, axis=1)
        expected = data.repeat(2).reshape(-1, 2)
        tm.assert_extension_array_equal(result, expected)

    def test_take_2d(self, data):
        arr2d = data.reshape(-1, 1)

        result = arr2d.take([0, 0, -1], axis=0)

        expected = data.take([0, 0, -1]).reshape(-1, 1)
        tm.assert_extension_array_equal(result, expected)

    def test_repr_2d(self, data):
        # this could fail in a corner case where an element contained the name
        res = repr(data.reshape(1, -1))
        assert res.count(f"<{type(data).__name__}") == 1

        res = repr(data.reshape(-1, 1))
        assert res.count(f"<{type(data).__name__}") == 1

    def test_reshape(self, data):
        arr2d = data.reshape(-1, 1)
        assert arr2d.shape == (data.size, 1)
        assert len(arr2d) == len(data)

        arr2d = data.reshape((-1, 1))
        assert arr2d.shape == (data.size, 1)
        assert len(arr2d) == len(data)

        with pytest.raises(ValueError):
            data.reshape((data.size, 2))
        with pytest.raises(ValueError):
            data.reshape(data.size, 2)

    def test_getitem_2d(self, data):
        arr2d = data.reshape(1, -1)

        result = arr2d[0]
        tm.assert_extension_array_equal(result, data)

        with pytest.raises(IndexError):
            arr2d[1]

        with pytest.raises(IndexError):
            arr2d[-2]

        result = arr2d[:]
        tm.assert_extension_array_equal(result, arr2d)

        result = arr2d[:, :]
        tm.assert_extension_array_equal(result, arr2d)

        result = arr2d[:, 0]
        expected = data[[0]]
        tm.assert_extension_array_equal(result, expected)

        # dimension-expanding getitem on 1D
        result = data[:, np.newaxis]
        tm.assert_extension_array_equal(result, arr2d.T)

    def test_iter_2d(self, data):
        arr2d = data.reshape(1, -1)

        objs = list(iter(arr2d))
        assert len(objs) == arr2d.shape[0]

        for obj in objs:
            assert isinstance(obj, type(data))
            assert obj.dtype == data.dtype
            assert obj.ndim == 1
            assert len(obj) == arr2d.shape[1]

    def test_tolist_2d(self, data):
        arr2d = data.reshape(1, -1)

        result = arr2d.tolist()
        expected = [data.tolist()]

        assert isinstance(result, list)
        assert all(isinstance(x, list) for x in result)

        assert result == expected

    def test_concat_2d(self, data):
        left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
        right = left.copy()

        # axis=0
        result = left._concat_same_type([left, right], axis=0)
        expected = data._concat_same_type([data] * 4).reshape(-1, 2)
        tm.assert_extension_array_equal(result, expected)

        # axis=1
        result = left._concat_same_type([left, right], axis=1)
        assert result.shape == (len(data), 4)
        tm.assert_extension_array_equal(result[:, :2], left)
        tm.assert_extension_array_equal(result[:, 2:], right)

        # axis > 1 -> invalid
        msg = "axis 2 is out of bounds for array of dimension 2"
        with pytest.raises(ValueError, match=msg):
            left._concat_same_type([left, right], axis=2)

    @pytest.mark.parametrize("method", ["backfill", "pad"])
    def test_fillna_2d_method(self, data_missing, method):
        # pad_or_backfill is always along axis=0
        arr = data_missing.repeat(2).reshape(2, 2)
        assert arr[0].isna().all()
        assert not arr[1].isna().any()

        result = arr._pad_or_backfill(method=method, limit=None)

        expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
        tm.assert_extension_array_equal(result, expected)

        # Reverse so that backfill is not a no-op.
        arr2 = arr[::-1]
        assert not arr2[0].isna().any()
        assert arr2[1].isna().all()

        result2 = arr2._pad_or_backfill(method=method, limit=None)

        expected2 = (
            data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
        )
        tm.assert_extension_array_equal(result2, expected2)

    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
    def test_reductions_2d_axis_none(self, data, method):
        arr2d = data.reshape(1, -1)

        err_expected = None
        err_result = None
        try:
            expected = getattr(data, method)()
        except Exception as err:
            # if the 1D reduction is invalid, the 2D reduction should be as well
            err_expected = err
            try:
                result = getattr(arr2d, method)(axis=None)
            except Exception as err2:
                err_result = err2

        else:
            result = getattr(arr2d, method)(axis=None)

        if err_result is not None or err_expected is not None:
            assert type(err_result) == type(err_expected)
            return

        assert is_matching_na(result, expected) or result == expected

    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
    @pytest.mark.parametrize("min_count", [0, 1])
    def test_reductions_2d_axis0(self, data, method, min_count):
        if min_count == 1 and method not in ["sum", "prod"]:
            pytest.skip(f"min_count not relevant for {method}")

        arr2d = data.reshape(1, -1)

        kwargs = {}
        if method in ["std", "var"]:
            # pass ddof=0 so we get all-zero std instead of all-NA std
            kwargs["ddof"] = 0
        elif method in ["prod", "sum"]:
            kwargs["min_count"] = min_count

        try:
            result = getattr(arr2d, method)(axis=0, **kwargs)
        except Exception as err:
            try:
                getattr(data, method)()
            except Exception as err2:
                assert type(err) == type(err2)
                return
            else:
                raise AssertionError("Both reductions should raise or neither")

        def get_reduction_result_dtype(dtype):
            # windows and 32bit builds will in some cases have int32/uint32
            #  where other builds will have int64/uint64.
            if dtype.itemsize == 8:
                return dtype
            elif dtype.kind in "ib":
                return NUMPY_INT_TO_DTYPE[np.dtype(int)]
            else:
                # i.e. dtype.kind == "u"
                return NUMPY_INT_TO_DTYPE[np.dtype("uint")]

        if method in ["sum", "prod"]:
            # std and var are not dtype-preserving
            expected = data
            if data.dtype.kind in "iub":
                dtype = get_reduction_result_dtype(data.dtype)
                expected = data.astype(dtype)
                assert dtype == expected.dtype

            if min_count == 0:
                fill_value = 1 if method == "prod" else 0
                expected = expected.fillna(fill_value)

            tm.assert_extension_array_equal(result, expected)
        elif method == "median":
            # std and var are not dtype-preserving
            expected = data
            tm.assert_extension_array_equal(result, expected)
        elif method in ["mean", "std", "var"]:
            if is_integer_dtype(data) or is_bool_dtype(data):
                data = data.astype("Float64")
            if method == "mean":
                tm.assert_extension_array_equal(result, data)
            else:
                tm.assert_extension_array_equal(result, data - data)

    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
    def test_reductions_2d_axis1(self, data, method):
        arr2d = data.reshape(1, -1)

        try:
            result = getattr(arr2d, method)(axis=1)
        except Exception as err:
            try:
                getattr(data, method)()
            except Exception as err2:
                assert type(err) == type(err2)
                return
            else:
                raise AssertionError("Both reductions should raise or neither")

        # not necessarily type/dtype-preserving, so weaker assertions
        assert result.shape == (1,)
        expected_scalar = getattr(data, method)()
        res = result[0]
        assert is_matching_na(res, expected_scalar) or res == expected_scalar


class NDArrayBacked2DTests(Dim2CompatTests):
    # More specific tests for NDArrayBackedExtensionArray subclasses

    def test_copy_order(self, data):
        # We should be matching numpy semantics for the "order" keyword in 'copy'
        arr2d = data.repeat(2).reshape(-1, 2)
        assert arr2d._ndarray.flags["C_CONTIGUOUS"]

        res = arr2d.copy()
        assert res._ndarray.flags["C_CONTIGUOUS"]

        res = arr2d[::2, ::2].copy()
        assert res._ndarray.flags["C_CONTIGUOUS"]

        res = arr2d.copy("F")
        assert not res._ndarray.flags["C_CONTIGUOUS"]
        assert res._ndarray.flags["F_CONTIGUOUS"]

        res = arr2d.copy("K")
        assert res._ndarray.flags["C_CONTIGUOUS"]

        res = arr2d.T.copy("K")
        assert not res._ndarray.flags["C_CONTIGUOUS"]
        assert res._ndarray.flags["F_CONTIGUOUS"]

        # order not accepted by numpy
        msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
        with pytest.raises(ValueError, match=msg):
            arr2d.copy("Q")

        # neither contiguity
        arr_nc = arr2d[::2]
        assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
        assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]

        assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
        assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]

        assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
        assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]

        assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
        assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]

        assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
        assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]